Data source: https://www.kaggle.com/datasets/piterfm/russian-navy/data?select=black_sea_fleet.csv

# Set up

In [None]:
import pandas as pd
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go

from plotly.subplots import make_subplots

pio.renderers.default='notebook'

In [None]:
def import_data(filepath: str) -> pd.DataFrame:
    """Import data from CSV file to DataFrame"""

    df = pd.read_csv(filepath)

    print("File:", filepath)
    print("Shape:", df.shape)
    print("Columns:", list(df.columns))
    print("Head:")
    display(df.head(2))

    return df

# Get data
df_fleet = import_data("russian-navy/black_sea_fleet.csv")  # All ships
df_fleet_losses = import_data("russian-navy/black_sea_losses.csv")  # Identified losses
df_fleet_wiki_en = import_data("russian-navy/black_sea_fleet_wiki_en.csv")  # War ships
df_fleet_wiki_ru = import_data("russian-navy/black_sea_fleet_wiki_ru.csv")  # War ships

# Normalise 'df_fleet' DataFrame
df_fleet.loc[:, "Length (m)"] = df_fleet["Length (m)"].str.replace(",", ".")  # Convert string to numeric
df_fleet.loc[:, "Length (m)"] = df_fleet["Length (m)"].astype(float)

# Normalise 'df_fleet_losses' DataFrame
df_fleet_losses = df_fleet_losses.fillna("unknown")  # Replace NaN values with name "unknown"
df_fleet_losses.loc[:, "Status"] = df_fleet_losses["Status"].replace({"dameged": "damaged", "destroyed ": "destroyed", "destroyed and sunk": "sunk", "damaged beyond economical repair": "damaged beyond repair"})  # Replace Status

In [None]:
# Get all the ships the their length
df = df_fleet[["Name", "Length (m)"]]
df = df.sort_values("Length (m)", ascending=False)
# df = df.head(100)
# Map to destroyed ships
df[df["Name"].isin(df_fleet_losses["Name"].to_list())].reset_index()
# df
px.bar(df, x="Name", y="Length (m)")

In [None]:
df_fleet[df_fleet["Name"] == "BDK-43"]

In [None]:
df = df_fleet_losses[["Name", "NameOld", "Status"]].set_index("Name")
df.join(df_fleet[["Name", "Length (m)"]].set_index("Name")).sort_values("Length (m)")

# Ships losses grouped by type (sunburst chart)
You can click on the inner circle to expand it.

In [194]:
# Transform data
df_destroyed = df_fleet_losses[df_fleet_losses["Status"].isin(["destroyed", "sunk"])]  # Destroyed
df_damaged = df_fleet_losses[(~df_fleet_losses["Status"].isin(["destroyed", "sunk"])) & (~df_fleet_losses["Name"].isin(df_destroyed["Name"]))]  # Damaged but not destroyed

# Create figures
fig1 = px.sunburst(df_damaged, path=["Status", "Type", "Name"], color="Status", color_discrete_map={"damaged": "#B47C08", "damaged beyond repair": "#835A06"})
fig2 = px.sunburst(df_destroyed, path=["Status", "Type", "Name"], color="Status", color_discrete_map={"destroyed": "#9C0D07", "sunk": "#6B0905"})

# Create subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=("Damaged", "Destroyed"), specs=[[{"type": "sunburst"}, {"type": "sunburst"}]])

for trace in fig1.data:
    fig.add_trace(trace, row=1, col=1)

for trace in fig2.data:
    fig.add_trace(trace, row=1, col=2)

fig.update_layout(height=1000)
fig.update_annotations(font_size=36)
fig.show()

In [195]:
px.sunburst(df_damaged, path=["Status", "Type", "Name"], color="Status", color_discrete_map={"damaged": "#B47C08", "damaged beyond repair": "#835A06"})