In [27]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px



In [28]:
df = pd.read_csv("Ship_Performance_Dataset.csv")
df 

Unnamed: 0,Date,Ship_Type,Route_Type,Engine_Type,Maintenance_Status,Speed_Over_Ground_knots,Engine_Power_kW,Distance_Traveled_nm,Draft_meters,Weather_Condition,Cargo_Weight_tons,Operational_Cost_USD,Revenue_per_Voyage_USD,Turnaround_Time_hours,Efficiency_nm_per_kWh,Seasonal_Impact_Score,Weekly_Voyage_Count,Average_Load_Percentage
0,2023-06-04,Container Ship,,Heavy Fuel Oil (HFO),Critical,12.597558,2062.983982,1030.943616,14.132284,Moderate,1959.017882,483832.354540,292183.273104,25.867077,1.455179,1.415653,1,93.769249
1,2023-06-11,Fish Carrier,Short-haul,Steam Turbine,Good,10.387580,1796.057415,1060.486382,14.653083,Rough,162.394712,483388.000509,883765.787360,63.248196,0.290361,0.885648,6,93.895372
2,2023-06-18,Container Ship,Long-haul,Diesel,Fair,20.749747,1648.556685,658.874144,7.199261,Moderate,178.040917,448543.404044,394018.746904,49.418150,0.499595,1.405813,9,96.218244
3,2023-06-25,Bulk Carrier,Transoceanic,Steam Turbine,Fair,21.055102,915.261795,1126.822519,11.789063,Moderate,1737.385346,261349.605449,87551.375175,22.409110,0.702906,1.370704,1,66.193698
4,2023-07-02,Fish Carrier,Transoceanic,Diesel,Fair,13.742777,1089.721803,1445.281159,9.727833,Moderate,260.595103,287718.375160,676121.459632,64.158231,1.331343,0.583383,8,80.008581
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2731,2024-06-02,Tanker,Short-haul,Heavy Fuel Oil (HFO),Good,11.607997,2918.395972,239.990359,13.700906,Moderate,318.111891,237975.067292,731584.322921,47.152337,1.000265,1.284895,3,74.813114
2732,2024-06-09,Bulk Carrier,Short-haul,Heavy Fuel Oil (HFO),Good,13.852798,2161.282358,831.355653,14.612775,,218.309002,21029.021721,374365.370930,64.325916,0.653474,0.891085,2,84.595155
2733,2024-06-16,Container Ship,Short-haul,Steam Turbine,Critical,16.813713,1343.608006,1376.460622,9.306518,,1630.646419,78883.312529,234120.365052,53.551090,0.594169,0.725404,6,80.975269
2734,2024-06-23,Tanker,Transoceanic,Heavy Fuel Oil (HFO),Good,23.132643,2028.143572,619.236340,6.623856,Moderate,153.441965,25241.550250,799713.737211,14.335517,0.895670,0.902960,2,92.853622


In [35]:
# maintenancce status vs average speed/cost

avg_data = df.groupby(['Maintenance_Status']).agg({'Speed_Over_Ground_knots': 'mean', 'Operational_Cost_USD': 'mean'}).reset_index()
melted_data = avg_data.melt(id_vars='Maintenance_Status', value_vars=['Speed_Over_Ground_knots', 'Operational_Cost_USD'],
                            var_name='Metric', value_name='Value')
fig = px.bar(
    melted_data,
    x='Maintenance_Status',
    y='Value',
    color='Metric',
    barmode='group',
    title='Maintenance Status vs Average Speed and Operational Cost',
    labels={'Value': 'Average Value', 'Maintenance Status': 'Maintenance Status', 'Metric': 'Metric'},
    color_discrete_sequence=px.colors.qualitative.Set2
)

fig.update_layout(xaxis_tickangle=-45)
fig.show()

In [39]:
# weather conditions vs efficiency

avgerage_efficiency = df.groupby('Weather_Condition')['Efficiency_nm_per_kWh'].mean().reset_index()

fig = px.bar(
    avgerage_efficiency,
    x='Weather_Condition',
    y='Efficiency_nm_per_kWh',
    title='Average Efficiency by Weather Condition',
    labels={'Efficiency': 'Average Efficiency', 'Weather Condition': 'Weather Condition'},
    color='Efficiency_nm_per_kWh',
    color_continuous_scale='Sunset'
)

fig.update_layout(xaxis_tickangle=-45)
fig.show()

In [41]:
# speed under different weather conditions
fig = px.box(
    df,
    x='Weather_Condition',
    y='Speed_Over_Ground_knots',
    title='Speed Distribution under Different Weather Conditions',
    labels={'Weather Condition': 'Weather Condition', 'Speed': 'Speed'},
    color='Weather_Condition',
    color_discrete_sequence=px.colors.qualitative.Pastel
)

fig.update_layout(xaxis_tickangle=-45)
fig.show()


In [42]:
# Cost vs Maintenance + Weather
avg_efficiency = df.groupby('Ship_Type')['Efficiency_nm_per_kWh'].mean().reset_index()

grouped_data = df.groupby(['Maintenance_Status', 'Weather_Condition'])[['Operational_Cost_USD']].mean().reset_index()
fig = px.density_heatmap(
    grouped_data,
    x='Maintenance_Status',
    y='Weather_Condition',
    z='Operational_Cost_USD',
    color_continuous_scale='Inferno',
    title='Heatmap of Operational Cost vs Maintenance and Weather',
    labels={'Operational Cost': 'Avg Operational Cost', 'Maintenance Status': 'Maintenance Status', 'Weather Condition': 'Weather Condition'}
)

fig.update_layout(xaxis_tickangle=-45)
fig.show()
