<a href="https://colab.research.google.com/github/EricSiq/Benchmarking_Indian_Ports_EDA/blob/main/Deployment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install gradio


Collecting gradio
  Downloading gradio-5.5.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.4-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.2 (from gradio)
  Downloading gradio_client-1.4.2-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface-hub>=0.25.1 (from gradio)
  Downloading huggingface_hub-0.26.2-py3-none-any.whl.metadata (13 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart==0.0.12 (from gradio)
  Downloading python_multipart-0.0.12-py3-none-any.whl.metadata (1.9 kB)
Col

In [None]:
from google.colab import files

uploaded = files.upload()


Saving capacity.csv to capacity.csv
Saving Output-per-Ship-Berth-Day.csv to Output-per-Ship-Berth-Day.csv
Saving pre-berthing detention.csv to pre-berthing detention.csv
Saving traffic.csv to traffic.csv
Saving TRT.csv to TRT.csv
Saving utilization.csv to utilization.csv


In [None]:
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

# Load your datasets
trt_df = pd.read_csv('/content/TRT.csv')
traffic_df = pd.read_csv('/content/traffic.csv')
capacity_df = pd.read_csv('/content/capacity.csv')
utilization_df = pd.read_csv('/content/utilization.csv')
pre_berthing_df = pd.read_csv('/content/pre-berthing detention.csv')
output_df = pd.read_csv('/content/Output-per-Ship-Berth-Day.csv')

# Function to plot metric trends
def plot_metric_trends(metric_df, metric_name):
    plt.figure(figsize=(10, 6))
    for port in [col for col in metric_df.columns if col not in ['Year', 'All Ports']]:
        plt.plot(metric_df['Year'], metric_df[port], label=port)
    plt.title(f'{metric_name} Trends Across Ports')
    plt.xlabel('Year')
    plt.ylabel(metric_name)
    plt.legend(loc='best')
    plt.grid(True)
    plt.xticks(rotation=45)
    plt.tight_layout()
    return plt.gcf()

# Functions to generate plots for each metric
def show_capacity_trends():
    return plot_metric_trends(capacity_df, "Capacity")

def show_utilization_trends():
    return plot_metric_trends(utilization_df, "Utilization")

def show_trt_trends():
    return plot_metric_trends(trt_df, "TRT")

def show_output_trends():
    return plot_metric_trends(output_df, "Output")

def show_pre_berthing_trends():
    return plot_metric_trends(pre_berthing_df, "Pre-Berthing")

# Mapping of metric names to functions
metric_to_function = {
    "Capacity": show_capacity_trends,
    "Utilization": show_utilization_trends,
    "TRT": show_trt_trends,
    "Output": show_output_trends,
    "Pre-Berthing": show_pre_berthing_trends
}

# Function to analyze correlations for a specific port
def analyze_port_correlations(port_name):
    if port_name not in capacity_df.columns:
        return f"Port {port_name} not found in data."

    try:
        metrics = {
            'Capacity': capacity_df.get(port_name),
            'Traffic': traffic_df.get(port_name),
            'Utilization': utilization_df.get(port_name),
            'TRT': trt_df.get(port_name),
            'Output': output_df.get(port_name)
        }

        for key, series in metrics.items():
            if series is None or series.isna().all():
                return f"Data for {key} is missing or not valid for {port_name}."

        for key in metrics:
            metrics[key] = pd.to_numeric(metrics[key], errors='coerce')

        correlation_df = pd.DataFrame(metrics).dropna()
        if correlation_df.shape[0] < 2:
            return "Not enough valid data for correlation analysis."

        return correlation_df.corr()
    except Exception as e:
        return f"Error calculating correlations: {str(e)}"

def get_correlation_data(port_name):
    result = analyze_port_correlations(port_name)
    if isinstance(result, str):
        return result
    return result

# List of port names
port_names = list(capacity_df.columns)

# Creating the Gradio interfaces
iface_trends = gr.Interface(
    fn=lambda metric: metric_to_function[metric](),
    inputs=gr.Dropdown(choices=["Capacity", "Utilization", "TRT", "Output", "Pre-Berthing"], label="Select Metric"),
    outputs=gr.Plot(label="Metric Trends"),
    title="Port Performance Analysis",
    description="Select a metric to view trends across ports over the years."
)

iface_correlation = gr.Interface(
    fn=get_correlation_data,
    inputs=gr.Dropdown(choices=port_names, label="Select Port"),
    outputs=gr.Dataframe(label="Correlation Matrix"),
    title="Port Correlation Analysis",
    description="Select a port to view the correlation matrix of capacity, traffic, utilization, TRT, and output."
)

def plot_port_comparison(metric_df, year, metric_name):
    try:
        metric_df['Year'] = metric_df['Year'].astype(str)
        year_data = metric_df[metric_df['Year'] == year].melt(
            id_vars=['Year'],
            value_vars=[col for col in metric_df.columns if col not in ['Year', 'All Ports']]
        )
        plt.figure(figsize=(15, 6))
        sns.barplot(x='variable', y='value', data=year_data)
        plt.title(f'{metric_name} Comparison Across Ports ({year})')
        plt.xticks(rotation=45)
        plt.xlabel('Ports')
        plt.ylabel(metric_name)
        plt.tight_layout()
        return plt.gcf()
    except Exception as e:
        return f"Error generating plot for {metric_name}: {str(e)}"

def show_comparison_plots(year):
    try:
        year = str(year)
        fig1 = plot_port_comparison(capacity_df, year, 'Capacity')
        fig2 = plot_port_comparison(traffic_df, year, 'Traffic')
        fig3 = plot_port_comparison(utilization_df, year, 'Utilization')
        fig4 = plot_port_comparison(trt_df, year, 'TRT')
        fig5 = plot_port_comparison(output_df, year, 'Output')
        fig6 = plot_port_comparison(pre_berthing_df, year, 'Pre-Berthing')
        return [fig1, fig2, fig3, fig4, fig5, fig6]
    except Exception as e:
        return f"Error showing comparison plots: {str(e)}"

years = capacity_df['Year'].unique()

iface_comparison = gr.Interface(
    fn=show_comparison_plots,
    inputs=gr.Dropdown(choices=[str(year) for year in years], label="Select Year"),
    outputs=[gr.Plot(label="Capacity"), gr.Plot(label="Traffic"), gr.Plot(label="Utilization"),
             gr.Plot(label="TRT"), gr.Plot(label="Output"), gr.Plot(label="Pre-Berthing")],
    title="Port Comparison Analysis",
    description="Select a year to compare different metrics across ports."
)

# Function for TRT performance analysis
def analyze_trt_performance():
    port_cols = [col for col in trt_df.columns if col not in ['Year', 'All Ports']]
    avg_trt = trt_df[port_cols].mean().sort_values()

    trt_trend = trt_df[port_cols].apply(lambda x: stats.linregress(range(len(x)), x)[0])

    performance_summary = pd.DataFrame({
        'Average_TRT': avg_trt,
        'TRT_Trend': trt_trend
    })

    plt.figure(figsize=(12, 6))
    sns.barplot(x=avg_trt.index, y=avg_trt.values)
    plt.title('Average Turn Round Time by Port')
    plt.xticks(rotation=45)
    plt.ylabel('Average TRT (days)')
    plt.xlabel('Port')
    plt.tight_layout()
    return plt.gcf()

iface_trt_performance = gr.Interface(
    fn=analyze_trt_performance,
    inputs=None,
    outputs=gr.Plot(label="TRT Performance Graph"),
    title="TRT Performance Analysis",
    description="Displays the average Turn Round Time (TRT) by port with a trend analysis."
)

# Function for Output Efficiency Analysis
def analyze_output_efficiency():
    port_cols = [col for col in output_df.columns if col not in ['Year', 'All Ports']]
    avg_output = output_df[port_cols].mean()
    avg_capacity = capacity_df[port_cols].mean()
    efficiency_ratio = (avg_output / avg_capacity).sort_values(ascending=False)

    plt.figure(figsize=(10, 6))
    plt.scatter(avg_capacity, avg_output)
    for i, port in enumerate(port_cols):
        plt.annotate(port, (avg_capacity[port], avg_output[port]))
    plt.xlabel('Average Capacity')
    plt.ylabel('Average Output per Ship Berth Day')
    plt.title('Port Output Efficiency vs Capacity')
    plt.tight_layout()
    return plt.gcf()

iface_output_efficiency = gr.Interface(
    fn=analyze_output_efficiency,
    inputs=None,
    outputs=gr.Plot(label="Output Efficiency Scatter Plot"),
    title="Output Efficiency Analysis",
    description="Scatter plot of average capacity vs. average output per ship berth day, with port labels."
)

# Function for Capacity Utilization Analysis
def analyze_capacity_utilization():
    port_cols = [col for col in utilization_df.columns if col not in ['Year', 'All Ports']]

    util_stats = pd.DataFrame({
        'Mean_Utilization': utilization_df[port_cols].mean(),
        'Std_Utilization': utilization_df[port_cols].std(),
        'Max_Utilization': utilization_df[port_cols].max(),
        'Min_Utilization': utilization_df[port_cols].min()
    }).sort_values('Mean_Utilization', ascending=False)

    plt.figure(figsize=(12, 6))
    for port in util_stats.index:
        plt.vlines(x=port,
                  ymin=util_stats.loc[port, 'Min_Utilization'],
                  ymax=util_stats.loc[port, 'Max_Utilization'],
                  color='gray', alpha=0.5)
        plt.plot(port, util_stats.loc[port, 'Mean_Utilization'], 'bo')

    plt.xticks(rotation=45)
    plt.ylabel('Utilization Rate (%)')
    plt.title('Port Capacity Utilization Ranges')
    plt.tight_layout()
    return plt.gcf()

iface_capacity_utilization = gr.Interface(
    fn=analyze_capacity_utilization,
    inputs=None,
    outputs=gr.Plot(label="Capacity Utilization Graph"),
    title="Capacity Utilization Analysis",
    description="Displays the range of utilization rates for different ports, with port names on the x-axis."
)

# Function for Geographical Performance Analysis
def analyze_geographical_performance1():
    east_coast = ['Kolkata', 'Haldia', 'Paradip', 'Vishakhapatnam', 'Ennore', 'Chennai']
    west_coast = ['Kandla', 'Mumbai', 'J.L.Nehru', 'Mormugoa', 'New Mangalore', 'Cochin']

    east_output = output_df[east_coast].mean()
    west_output = output_df[west_coast].mean()

    output_comparison = pd.DataFrame({
        'East Coast': east_output,
        'West Coast': west_output
    }).T  # Transpose so that we have "East Coast" and "West Coast" as rows

    output_comparison.plot(kind='bar', stacked=True, figsize=(12, 7), colormap='tab20')
    plt.title('East Coast vs West Coast Port Performance (Output Only)')
    plt.ylabel('Average Output Value')
    plt.xticks(rotation=0)
    plt.tight_layout()
    plt.legend(title="Ports", bbox_to_anchor=(1.05, 1), loc='upper left')
    return plt.gcf()

iface_geographical_performance = gr.Interface(
    fn=analyze_geographical_performance1,
    inputs=None,
    outputs=gr.Plot(label="Geographical Performance Graph"),
    title="Geographical Performance Analysis",
    description="Displays a stacked bar graph comparing the average output of East and West Coast ports."
)

# Function for Overall Traffic Growth Analysis
def analyze_overall_traffic_growth():
    traffic_df['Total Traffic'] = traffic_df.drop(columns='Year').sum(axis=1)
    traffic_df['Traffic Growth'] = traffic_df['Total Traffic'].pct_change() * 100

    plt.figure(figsize=(12, 7))
    plt.plot(traffic_df['Year'], traffic_df['Traffic Growth'], marker='o', color='teal', linewidth=2)

    plt.xlabel('Year', fontsize=12)
    plt.ylabel('Traffic Growth (%)', fontsize=12)
    plt.title('Overall Traffic Growth Across All Ports per Year', fontsize=14)
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.xticks(rotation=45)

    for x, y in zip(traffic_df['Year'], traffic_df['Traffic Growth']):
        if not pd.isna(y):
            plt.annotate(f'{y:.1f}%',
                         (x, y),
                         textcoords="offset points",
                         xytext=(0,10),
                         ha='center')

    plt.tight_layout()
    return plt.gcf()

iface_traffic_growth = gr.Interface(
    fn=analyze_overall_traffic_growth,
    inputs=None,
    outputs=gr.Plot(label="Traffic Growth Graph"),
    title="Overall Traffic Growth Analysis",
    description="Displays the overall traffic growth of all major ports across the years."
)

# Combine all interfaces using Gradio Tabs
app = gr.TabbedInterface(
    [iface_trends, iface_correlation, iface_comparison, iface_trt_performance, iface_output_efficiency, iface_capacity_utilization, iface_geographical_performance, iface_traffic_growth],
    tab_names=["Metric Trends", "Correlation Analysis", "Port Comparison", "TRT Performance", "Output Efficiency", "Capacity Utilization", "Geographical Performance", "Traffic Growth"]
)

# Launch the combined app
app.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://872fd430db1e0c7260.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


