In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px



In [3]:
df = pd.read_csv("Ship_Performance_Dataset.csv")
df 

Unnamed: 0,Date,Ship_Type,Route_Type,Engine_Type,Maintenance_Status,Speed_Over_Ground_knots,Engine_Power_kW,Distance_Traveled_nm,Draft_meters,Weather_Condition,Cargo_Weight_tons,Operational_Cost_USD,Revenue_per_Voyage_USD,Turnaround_Time_hours,Efficiency_nm_per_kWh,Seasonal_Impact_Score,Weekly_Voyage_Count,Average_Load_Percentage
0,2023-06-04,Container Ship,,Heavy Fuel Oil (HFO),Critical,12.597558,2062.983982,1030.943616,14.132284,Moderate,1959.017882,483832.354540,292183.273104,25.867077,1.455179,1.415653,1,93.769249
1,2023-06-11,Fish Carrier,Short-haul,Steam Turbine,Good,10.387580,1796.057415,1060.486382,14.653083,Rough,162.394712,483388.000509,883765.787360,63.248196,0.290361,0.885648,6,93.895372
2,2023-06-18,Container Ship,Long-haul,Diesel,Fair,20.749747,1648.556685,658.874144,7.199261,Moderate,178.040917,448543.404044,394018.746904,49.418150,0.499595,1.405813,9,96.218244
3,2023-06-25,Bulk Carrier,Transoceanic,Steam Turbine,Fair,21.055102,915.261795,1126.822519,11.789063,Moderate,1737.385346,261349.605449,87551.375175,22.409110,0.702906,1.370704,1,66.193698
4,2023-07-02,Fish Carrier,Transoceanic,Diesel,Fair,13.742777,1089.721803,1445.281159,9.727833,Moderate,260.595103,287718.375160,676121.459632,64.158231,1.331343,0.583383,8,80.008581
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2731,2024-06-02,Tanker,Short-haul,Heavy Fuel Oil (HFO),Good,11.607997,2918.395972,239.990359,13.700906,Moderate,318.111891,237975.067292,731584.322921,47.152337,1.000265,1.284895,3,74.813114
2732,2024-06-09,Bulk Carrier,Short-haul,Heavy Fuel Oil (HFO),Good,13.852798,2161.282358,831.355653,14.612775,,218.309002,21029.021721,374365.370930,64.325916,0.653474,0.891085,2,84.595155
2733,2024-06-16,Container Ship,Short-haul,Steam Turbine,Critical,16.813713,1343.608006,1376.460622,9.306518,,1630.646419,78883.312529,234120.365052,53.551090,0.594169,0.725404,6,80.975269
2734,2024-06-23,Tanker,Transoceanic,Heavy Fuel Oil (HFO),Good,23.132643,2028.143572,619.236340,6.623856,Moderate,153.441965,25241.550250,799713.737211,14.335517,0.895670,0.902960,2,92.853622


In [5]:
# Speed over ground vs distance travelled
data_sorted = df.sort_values(by='Distance_Traveled_nm')

fig = px.line(
    data_sorted, 
    x='Distance_Traveled_nm', 
    y='Speed_Over_Ground_knots', 
    title='Speed over Ground vs Distance Travelled',
    markers=True,
    labels={'Distance_Traveled_nm': 'Distance Travelled (nm)', 'Speed_over_Ground_knot': 'Speed over Ground (knot)'}
)
fig.update_layout(xaxis_tickangle=0)
fig.show()

In [6]:
# engine type vs average speed
avg_speed = df.groupby('Engine_Type')['Speed_Over_Ground_knots'].mean().reset_index()
fig = px.bar(
    avg_speed, 
    x='Engine_Type', 
    y='Speed_Over_Ground_knots', 
    title='Average Speed by Engine Type',
    labels={'Engine_Type': 'Engine Type', 'Speed_Over_Ground_knots': 'Average Speed (knot)'},
    color='Speed_Over_Ground_knots',
    color_continuous_scale='tealgrn'
)
fig.update_layout(xaxis_tickangle=-45)
fig.show()

In [7]:
# engine power vs average speed
average_speed_power = df.groupby('Engine_Power_kW')['Speed_Over_Ground_knots'].mean().reset_index()

fig = px.scatter(
    average_speed_power, 
    x='Engine_Power_kW', 
    y='Speed_Over_Ground_knots', 
    title='Average Speed by Engine Power',
    labels={'Engine_Power_kW': 'Engine Power (kW)', 'Speed_Over_Ground_knots': 'Average Speed (knot)'},
    color='Speed_Over_Ground_knots',
    color_continuous_scale='viridis'
)
fig.show()

In [8]:
# speed distribution

fig = px.histogram(
    df, 
    x='Speed_Over_Ground_knots', 
    nbins=30,
    title='Speed Distribution',
    labels={'Speed_Over_Ground_knots': 'Speed over Ground (knot)'},
    color_discrete_sequence=['skyblue']
)   
fig.show()