In [None]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
from PIL import Image

### 1. Introduction Human Development & Economy vs CO2 Emissions & Climate Change

In [None]:
# Load the data
gdp_df = pd.read_csv('data/gdp-per-capita-worldbank.csv')
co2_df = pd.read_csv('data/annual-co2-emissions-per-country.csv')
hdi_df = pd.read_csv('data/human-development-index.csv')
temp_anomaly_df = pd.read_csv('data/temperature-anomaly.csv')

In [None]:
# Filter data to include only years from 1990 to 2022
gdp_df = gdp_df[(gdp_df['Year'] >= 1990) & (gdp_df['Year'] <= 2022)]
co2_df = co2_df[(co2_df['Year'] >= 1990) & (co2_df['Year'] <= 2022)]
hdi_df = hdi_df[(hdi_df['Year'] >= 1990) & (hdi_df['Year'] <= 2022)]
temp_anomaly_df = temp_anomaly_df[(temp_anomaly_df['Year'] >= 1990) & (temp_anomaly_df['Year'] <= 2022)]

# Calculate the global averages
avg_gdp = gdp_df.groupby('Year')['GDP per capita, PPP (constant 2017 international $)'].mean()
avg_co2 = co2_df.groupby('Year')['Annual CO₂ emissions'].mean()
avg_hdi = hdi_df.groupby('Year')['Human Development Index'].mean()
avg_temp_anomaly = temp_anomaly_df.groupby('Year')['Global average temperature anomaly relative to 1961-1990'].mean()

# Define tick positions for each axis with evenly spaced values
gdp_ticks = np.linspace(avg_gdp.min(), avg_gdp.max(), 6)
hdi_ticks = np.linspace(avg_hdi.min(), avg_hdi.max(), 6)
co2_ticks = np.linspace(avg_co2.min(), avg_co2.max(), 6)
temp_ticks = np.linspace(avg_temp_anomaly.min(), avg_temp_anomaly.max(), 6)

# Set up the figure
fig = go.Figure()

# Plot average GDP per capita on the primary y-axis (left side)
fig.add_trace(go.Scatter(
    x=avg_gdp.index,
    y=avg_gdp,
    mode='lines+markers',
    name='Global Average GDP per Capita',
    line=dict(width=2, color="#1f77b4"),
    yaxis="y1"
))

# Plot average HDI on the secondary y-axis (left side, slightly inward)
fig.add_trace(go.Scatter(
    x=avg_hdi.index,
    y=avg_hdi,
    mode='lines+markers',
    name='Global Average HDI',
    line=dict(dash='solid', width=2, color="#d62728"),
    yaxis="y2"
))

# Plot average CO₂ emissions on the tertiary y-axis (right side)
fig.add_trace(go.Scatter(
    x=avg_co2.index,
    y=avg_co2,
    mode='lines',
    name='Global Average CO₂ Emissions',
    line=dict(dash='dot', width=2, color="#2ca02c"),
    yaxis="y3"
))

# Plot average temperature anomaly on the fourth y-axis (right side, positioned slightly inward)
fig.add_trace(go.Scatter(
    x=avg_temp_anomaly.index,
    y=avg_temp_anomaly,
    mode='lines+markers',
    name='Global Average Temperature Anomaly',
    line=dict(dash='dot', width=2, color="#ff7f0e"),
    yaxis="y4"
))

# Customize layout with a centered title and defined ticks for each axis
fig.update_layout(
    title=dict(
        text="Human Development Economics vs. Climate Change Indicators (1990 - Present)",
        x=0.5,  # Center title horizontally
        xanchor="center"
    ),
    xaxis=dict(
        title="Year",
        domain=[0.2, 0.8],  # Widen the plot area
        showgrid=False
    ),

    # Primary y-axis for GDP per capita (left side) with exactly 6 ticks
    yaxis=dict(
        title=dict(text="GDP per capita", font=dict(color="#1f77b4")),
        tickfont=dict(color="#1f77b4"),
        side="left",
        tickmode="array",
        tickvals=gdp_ticks,
        showgrid=False
    ),

    # Secondary y-axis for HDI (left side, slightly inward) with exactly 6 ticks
    yaxis2=dict(
        title=dict(text="Human Development Index", font=dict(color="#d62728")),
        tickfont=dict(color="#d62728"),
        anchor="free",
        overlaying="y",
        side="left",
        position=0.1,
        tickmode="array",
        tickvals=hdi_ticks,
        showgrid=False
    ),

    # Tertiary y-axis for CO₂ emissions (right side) with exactly 6 ticks
    yaxis3=dict(
        title=dict(text="CO₂ Emissions (in tons)", font=dict(color="#2ca02c")),
        tickfont=dict(color="#2ca02c"),
        anchor="x",
        overlaying="y",
        side="right",
        position=0.9,
        tickmode="array",
        tickvals=co2_ticks,
        showgrid=False
    ),

    # Fourth y-axis for temperature anomaly (right side, positioned slightly inward) with exactly 6 ticks
    yaxis4=dict(
        title=dict(text="Temperature Anomaly (°C)", font=dict(color="#ff7f0e")),
        tickfont=dict(color="#ff7f0e"),
        anchor="free",
        overlaying="y",
        side="right",
        position=0.9,
        tickmode="array",
        tickvals=temp_ticks,
        showgrid=False
    ),

    # Legend settings with no extra margin
    legend=dict(
        orientation="h",
        yanchor="top",
        y=-0.15,
        xanchor="center",
        x=0.5
    ),
    template="plotly_white",
    height=800,
    width=1400  # Increased width for a more professional display
)

# Display the chart
fig.show()


### 2. Global CO2 Emissions by Sector

In [None]:
import plotly.graph_objects as go

# Define the node labels, with "Total" as the starting point
node_labels = [
    "Total",
    "Energy", "Direct Industrial Processes", "Waste", "Agriculture, Forestry, and Land Use",
    "Industry", "Transport", "Energy use in buildings",
    "Iron and Steel", "Non-ferrous metals", "Chemicals & petrochemicals", "Food and tobacco", "Paper & pulp",
    "Machinery", "Other industry",
    "Road transport", "Aviation", "Shipping", "Rail", "Pipeline",
    "Residential buildings", "Commercial buildings",
    "Chemicals", "Cement",
    "Landfills", "Wastewater",
    "Grassland", "Cropland", "Deforestation", "Crop burning", "Rice cultivation", "Agricultural soils",
    "Livestock & manure",
    "Unallocated fuel combustion", "Fugitive emissions", "Energy in Agriculture & Fishing"
]

# Define the source, target, and values for the links
link_source = [
    0, 0, 0, 0,
    1, 1, 1, 1, 1, 1,
    2, 2,
    3, 3,
    4, 4, 4, 4, 4, 4, 4,
    5, 5, 5, 5, 5, 5, 5,
    6, 6, 6, 6, 6,
    7, 7,
]

link_target = [
    1, 2, 3, 4,
    5, 6, 7, 33, 34, 35,
    22, 23,
    24, 25,
    26, 27, 28, 29, 30, 31, 32,
    8, 9, 10, 11, 12, 13, 14,
    15, 16, 17, 18, 19,
    20, 21,
]

link_value = [
    73.2, 5.2, 3.2, 18.4,  # Total distribution to main categories
    24.2, 16.2, 17.5, 7.8, 5.8, 1.7,
    2.2, 3,
    1.9, 1.3,
    0.1, 1.4, 2.2, 3.5, 1.3, 4.1, 5.8,
    7.2, 0.7, 3.6, 1, 0.6, 0.5, 10.6,
    11.9, 1.9, 1.7, 0.4, 0.3,
    10.9, 6.6,
]

# Set link colors to have a low opacity without specifying individual colors
link_colors = [f'rgba(0,0,0,0.2)'] * len(link_source)

# Create the Sankey diagram with increased thickness, padding, and arrangement adjustments
fig = go.Figure(data=[go.Sankey(
    arrangement="perpendicular",  # Improved layout to reduce overlap
    node=dict(
        pad=40,  # Further increased padding for better separation
        thickness=50,  # Increased node thickness for clarity
        line=dict(color="black", width=0.7),
        label=node_labels
    ),
    link=dict(
        source=link_source,
        target=link_target,
        value=link_value,
        color=link_colors,
        hoverlabel=dict(bgcolor="lightblue")
    )
)])

# Update layout to make the diagram more squared and clear
fig.update_layout(
    title={
        'text': "Global Greenhouse Gas Emissions Breakdown by Sector",
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top',
    },
    title_font=dict(size=30),
    font_size=24,  # Slightly increased font size for labels
    width=1400,  # Increased width
    height=1800,  # Increased height for a more squared layout and more space
    margin=dict(t=200, b=200),  # Increased top and bottom margins for better spacing
)

fig.show()


### 3. Temperature Anomaly over the World

In [None]:
# Load the data
temp_anomaly_df = pd.read_csv('data/annual-temperature-anomalies.csv')
print(temp_anomaly_df.head())

In [None]:
# Define bins and labels for temperature anomalies
bins = [-np.inf, -2, -1.5, -1, -0.5, 0, 0.5, 1, 1.5, 2, np.inf]
labels = ["< -2°C", "-2 to -1.5°C", "-1.5 to -1°C", "-1 to -0.5°C", "-0.5 to 0°C",
          "0 to 0.5°C", "0.5 to 1°C", "1 to 1.5°C", "1.5 to 2°C", "> 2°C"]
temp_anomaly_df['Temperature Range'] = pd.cut(temp_anomaly_df['Temperature anomaly'], bins=bins, labels=labels,
                                              right=False)

# Define a color scale matching the labels
color_scale = [
    "#1f3b73",  # dark blue for < -2
    "#3a5a9a",  # softer blue for -2 to -1.5
    "#6580b2",  # medium blue for -1.5 to -1
    "#90a6ca",  # light blue for -1 to -0.5
    "#c3d0e5",  # very light blue for -0.5 to 0
    "#e5c3c3",  # light coral for 0 to 0.5
    "#d69494",  # soft red for 0.5 to 1
    "#c76a6a",  # medium red for 1 to 1.5
    "#b34040",  # strong red for 1.5 to 2
    "#8a2b2b"  # dark red for > 2
]

# Create the plot with defined category order, consistent colors, and world scope (without Antarctica)
fig = px.choropleth(
    temp_anomaly_df,
    locations="Code",
    color="Temperature Range",
    hover_name="Entity",
    hover_data={"Code": False, "Year": True, "Temperature anomaly": True},
    color_discrete_sequence=color_scale,
    animation_frame="Year",
    category_orders={"Temperature Range": labels},  # Ensure categories align with the colors
    scope="world"  # Set scope to world to exclude Antarctica
)

# Update layout for refined margins and color bar positioning
fig.update_layout(
    title="Annual Temperature Anomalies by Country (°C)",
    title_x=0.5,
    title_font_size=24,  # Increase title font size
    width=1200,  # Increase width
    height=800,  # Increase height
    margin=dict(t=80, b=10, l=20, r=20),  # Reduce bottom margin to bring year controller closer
    coloraxis_colorbar=dict(
        title="Temperature Anomaly (°C)",
        tickvals=bins,
        ticktext=labels,
        x=1.02,  # Position color bar close to the right
        y=0.4,  # Move color bar slightly lower
        len=0.7  # Shorten color bar length slightly
    )
)


fig.update_geos(lataxis_range=[-59, 90])

# Show the figure
fig.show()


### 4. Sea Level Rise + Future Projection

In [None]:
# Load the data
sea_level_df = pd.read_csv('data/sea-level.csv')

print(sea_level_df.head())

In [None]:
# Extract base level and normalize sea level values to show changes over time
sea_level_df['Date'] = pd.to_datetime(sea_level_df['Day'])
sea_level_df.sort_values(by='Date', inplace=True)
first_year = sea_level_df['Date'].dt.year.iloc[0]
base_level = sea_level_df['Global sea level as an average of Church and White (2011) and UHSLC data'].iloc[0]

# Normalize sea level relative to the initial value of the first year (to make initial values start at baseline)
sea_level_df['Relative Sea Level'] = sea_level_df[
                                         'Global sea level as an average of Church and White (2011) and UHSLC data'] - base_level

# Set up figure with custom beach and person SVG
fig = go.Figure(
    data=[
        # Water level fill
        go.Scatter(
            x=[-1, -1, 2, 2],
            y=[-1000, sea_level_df['Relative Sea Level'].iloc[0],
               sea_level_df['Relative Sea Level'].iloc[0], -1000],
            fill='toself',
            fillcolor='lightblue',
            mode='none',
            name="Water Level"
        ),
        # Red line for the current sea level
        go.Scatter(
            x=[-1, 2],
            y=[sea_level_df['Relative Sea Level'].iloc[0]] * 2,
            mode="lines",
            line=dict(color="firebrick", width=2, dash="dot"),
            showlegend=True,
            name="Current Sea Level"
        ),
        # Static baseline trace
        go.Scatter(
            x=[-1, 2],
            y=[0, 0],
            mode="lines",
            line=dict(color="steelblue", width=2, dash="dash"),
            showlegend=True,
            name="Baseline"
        )
        #
    ]
)

# SVG path data for a curved beach touching the 0 line
beach_path = "M -2 -200 C -1 200, -1 100, 0.4 0 S 0.5 -200, 2 -1000 L 2 -1000 L 2 -2000 L -1 -2000 Z"
fig.add_shape(
    type="path",
    path=beach_path,
    fillcolor="burlywood",
    line=dict(color="sandybrown")
)

# Overlay the SVG image of the person on the beach where it touches 0
fig.add_layout_image(
    dict(
        source="images/person.svg",
        x=0.2, y=0,
        xref="x", yref="y",
        sizex=1.6,
        sizey=2240,
        xanchor="center", yanchor="bottom"
    )
)

# Create frames for animated water level, red line, and label for each year
frames = [
    go.Frame(
        data=[
            go.Scatter(
                x=[-1, -1, 2, 2],
                y=[-1000, sea_level_df['Relative Sea Level'].iloc[i],
                   sea_level_df['Relative Sea Level'].iloc[i], -1000],
                fill='toself',
                fillcolor='lightblue',
                mode='none'
            ),
            # Red line for the current sea level, on top of the sea level fill
            go.Scatter(
                x=[-1, 2],
                y=[sea_level_df['Relative Sea Level'].iloc[i]] * 2,
                mode="lines",
                line=dict(color="firebrick", width=2, dash="dot"),
                showlegend=True,
                name="Current Sea Level"
            )
        ],
        name=str(sea_level_df['Date'].iloc[i].date()),
        layout=go.Layout(
            annotations=[
                # Annotation text for the current year in red
                dict(
                    x=1, y=sea_level_df['Relative Sea Level'].iloc[i] + 50,
                    text=f"Sea Level in {sea_level_df['Date'].iloc[i].year}",
                    showarrow=False,
                    font=dict(size=14, color="firebrick"),
                    xanchor="center"
                ),
                # Add baseline annotation
                dict(
                    x=1.8, y=-50,
                    text=f"Sea Level in {first_year}",
                    showarrow=False,
                    font=dict(color="steelblue", size=12)
                )
            ]
        )
    )
    for i in range(len(sea_level_df))
]

# Configure layout with updated y-axis limits, play button, and aspect ratio for high resolution, with additional margins
fig.update_layout(
    title="Sea Level Rise Visualization",
    title_x=0.5,
    title_font=dict(size=20),
    xaxis=dict(visible=False, range=[0.07, 2]),
    yaxis=dict(range=[-1000, 2000], title="Relative Sea Level (mm)", zeroline=False),
    width=900,
    height=1200,
    margin=dict(l=100, r=100, t=150, b=150),
    annotations=[  # Static annotation for the baseline
        dict(
            x=1.8, y=-50,
            text=f"Sea Level in {first_year}",
            showarrow=False,
            font=dict(color="steelblue", size=12)
        )
    ],
    updatemenus=[dict(
        type="buttons",
        showactive=False,
        x=0.5, y=-0.15,
        xanchor="center", yanchor="top",
        buttons=[dict(label="Play",
                      method="animate",
                      args=[None, dict(frame=dict(duration=100, redraw=True), fromcurrent=True)])
                 ]
    )]
)

# Add frames to the figure
fig.frames = frames

fig.show()


In [None]:
import pandas as pd
import plotly.graph_objects as go

# Load and prepare the data
sea_level_df = pd.read_csv('data/sea-level.csv')
sea_level_df['Date'] = pd.to_datetime(sea_level_df['Day'])
sea_level_df.sort_values(by='Date', inplace=True)

# Extract the base level to normalize sea level values
first_year = sea_level_df['Date'].dt.year.iloc[0]
base_level = sea_level_df['Global sea level as an average of Church and White (2011) and UHSLC data'].iloc[0]
sea_level_df['Relative Sea Level'] = sea_level_df['Global sea level as an average of Church and White (2011) and UHSLC data'] - base_level

# Create the figure
fig = go.Figure()

# Add the line trace for sea level
fig.add_trace(go.Scatter(
    x=sea_level_df['Date'],
    y=sea_level_df['Relative Sea Level'],
    mode='lines+markers',
    line=dict(color='deepskyblue', width=3),
    marker=dict(size=5, color='deepskyblue', line=dict(width=0.5, color='darkblue')),
    name="Relative Sea Level"
))

# Add a dashed line for the baseline stretching to the end of the x-axis range +- 2 years
fig.add_shape(
    type="line",
    x0=sea_level_df['Date'].min() - pd.DateOffset(years=120),
    y0=0,
    x1=sea_level_df['Date'].max() + pd.DateOffset(years=120),
    y1=0,
    line=dict(color="gray", width=2, dash="dash")
)

# Add an annotation for the baseline, positioned above the line
fig.add_annotation(
    x=sea_level_df['Date'].max(),
    y=10,
    text=f"Baseline Level ({first_year})",
    showarrow=False,
    font=dict(size=14, color="gray"),
    xanchor="right",
    yanchor="bottom",
    align="right",
    bgcolor="white",
    bordercolor="gray",
    borderwidth=1
)

# Highlight specific dates with annotations
highlight_dates = ["1992-01-01", "2020-01-01"]
for date in highlight_dates:
    # Find the closest date in the dataset if an exact match isn't found
    closest_date = sea_level_df.iloc[(sea_level_df['Date'] - pd.Timestamp(date)).abs().argmin()]['Date']
    value = sea_level_df.loc[sea_level_df['Date'] == closest_date, 'Relative Sea Level'].values[0]
    fig.add_annotation(
        x=closest_date,
        y=value,
        text=closest_date.strftime('%b %Y'),
        showarrow=True,
        arrowhead=2,
        ax=0, ay=-40,
        font=dict(size=13, color="darkblue"),
        arrowcolor="deepskyblue"
    )

# Customize layout for readability and aesthetics
fig.update_layout(
    title="Global Sea Level Rise (Relative to Initial Base Level)",
    title_font=dict(size=26, color='darkslategray'),
    title_x=0.5,
    xaxis=dict(
        title="Year",
        titlefont=dict(size=18),
        tickfont=dict(size=12),
        showgrid=False,  # Remove grid for x-axis
        showline=True,   # Add axis line
        linecolor="black",
        ticks="outside",
        range=[sea_level_df['Date'].min() - pd.DateOffset(months=7), sea_level_df['Date'].max() + pd.DateOffset(months=5)],
        ticklen=5,
    ),
    yaxis=dict(
        title="Relative Sea Level (mm)",
        titlefont=dict(size=18),
        tickfont=dict(size=12),
        showgrid=True,
        gridcolor="lightgray",
        showline=True,   # Add axis line
        linecolor="black",
        ticks="outside",
        ticklen=5,
        range=[-50, 300]  # Set y-axis range from -200 to 200 mm
    ),
    plot_bgcolor="white",
    width=1200,
    height=800,
    margin=dict(l=100, r=100, t=100, b=100)
)

# Display the plot
fig.show()


### 5. Melting Polar Ice Caps

In [None]:
# Load the data
ice_df = pd.read_csv('data/ice-sheet-mass-balance.csv')

print(ice_df.head())

In [None]:
import pandas as pd
import plotly.graph_objects as go
from PIL import Image, ImageDraw
import numpy as np

# Load the data
ice_df = pd.read_csv('data/ice-sheet-mass-balance.csv')

# Parse dates and sort the data by date
ice_df['Date'] = pd.to_datetime(ice_df['Day'])
ice_df.sort_values(by='Date', inplace=True)

# Extract the year from the date and calculate annual average mass change
ice_df['Year'] = ice_df['Date'].dt.year
yearly_avg_df = ice_df.groupby('Year')[
    'Cumulative change in mass in the ice sheets, according to NASA/JPL'].mean().reset_index()

# Define initial mass and scaling factor
initial_mass = 24380  # Initial mass in billion metric tons
scaling_factor = 1 / initial_mass  # Scale size relative to initial mass
yearly_avg_df['Relative Mass'] = initial_mass + yearly_avg_df[
    'Cumulative change in mass in the ice sheets, according to NASA/JPL']

# Load the ice cube and table images
ice_cube_image = Image.open("images/ice-cube.png")
table_image = Image.open("images/table.png")

# Create a detailed water droplet SVG with custom shape
def create_detailed_droplet():
    droplet = Image.new("RGBA", (20, 30), (0, 0, 0, 0))
    draw = ImageDraw.Draw(droplet)
    draw.ellipse((5, 2, 15, 28), fill="dodgerblue", outline="deepskyblue", width=1)
    draw.ellipse((7, 10, 13, 20), fill="lightblue")  # Add a lighter center for detail
    return droplet

# Generate random positions for droplets, simulating melting effect
def generate_droplets(num_droplets):
    positions = []
    for _ in range(num_droplets):
        x = 1.5 + np.random.uniform(-0.15, 0.15)
        y = 1.1 - np.random.uniform(0, 0.5)
        positions.append((x, y))
    return positions

# Set up figure with a baseline 1x1 square and the ice cube image centered
fig = go.Figure()

# Baseline square for initial 1x1 size of the ice cube
fig.add_shape(
    type="rect",
    x0=1.065, y0=1,
    x1=1.935, y1=2,
    line=dict(color="gray", width=2, dash="dash")
)

# Initial ice cube and table images
fig.add_layout_image(
    dict(
        source=ice_cube_image,
        x=1.5, y=1.5,
        xref="x", yref="y",
        sizex=1.2,
        sizey=1.2,
        xanchor="center",
        yanchor="middle"
    )
)

# Initial table below the ice cube
fig.add_layout_image(
    dict(
        source=table_image,
        x=1.5, y=0.55,
        xref="x", yref="y",
        sizex=4,
        sizey=0.6,
        xanchor="center",
        yanchor="bottom"
    )
)

# Create frames for the ice cube shrinkage animation with table and water droplets in each frame
frames = [
    go.Frame(
        name=str(yearly_avg_df['Year'].iloc[i]),
        layout=go.Layout(
            images=[
                # Shrink the ice cube as the mass decreases
                dict(
                    source=ice_cube_image,
                    # Calculate the new position and size based on the relative mass change
                    x=1.5, y=1.5 - (1.2 - np.sqrt(yearly_avg_df['Relative Mass'].iloc[i] / initial_mass) * 1.2) / 2,
                    xref="x", yref="y",
                    # Update sizex and sizey to reflect square root scaling for area effect
                    sizex=np.sqrt(yearly_avg_df['Relative Mass'].iloc[i] / initial_mass) * 1.2,
                    sizey=np.sqrt(yearly_avg_df['Relative Mass'].iloc[i] / initial_mass) * 1.2,
                    xanchor="center",
                    yanchor="middle"
                ),
                # Table below the ice cube in every frame
                dict(
                    source=table_image,
                    x=1.5, y=0.55,
                    xref="x", yref="y",
                    sizex=4,
                    sizey=0.6,
                    xanchor="center",
                    yanchor="bottom"
                ),
                # Randomly positioned droplets as the ice melts
                *[
                    dict(
                        source=create_detailed_droplet(),
                        x=pos[0], y=pos[1],
                        xref="x", yref="y",
                        sizex=0.05,
                        sizey=0.1,
                        xanchor="center",
                        yanchor="middle"
                    ) for pos in generate_droplets(i + 1)  # Increase droplet count over time
                ]
            ],
            annotations=[
                dict(
                    x=1.5, y=2.3,
                    text=f"Initial Mass: {initial_mass} billion tons<br>Current Mass: {yearly_avg_df['Relative Mass'].iloc[i]:.2f} billion tons",
                    showarrow=False,
                    font=dict(size=20, color="cornflowerblue"),
                    xanchor="center",
                    bgcolor="ghostwhite",
                    bordercolor="cornflowerblue",
                    borderwidth=2
                ),
                dict(
                    x=1.5, y=0.3,
                    text=f"Year: {yearly_avg_df['Year'].iloc[i]}<br>Mass Change: {yearly_avg_df['Relative Mass'].iloc[i] - initial_mass:.2f} billion tons",
                    showarrow=False,
                    font=dict(size=20, color="black"),
                    xanchor="center"
                )
            ]
        )
    )
    for i in range(len(yearly_avg_df))
]

# Initial plot setup with white background
fig.update_layout(
    title="Antarctic Ice Mass Change Visualization (Annual Average)",
    title_font=dict(size=30),
    title_x=0.5,
    width=1100,
    height=1100,
    paper_bgcolor="white",
    plot_bgcolor="white",
    xaxis=dict(range=[0.25, 2.75], visible=False),
    yaxis=dict(range=[0.25, 2.75], visible=False),
    margin=dict(l=50, r=50, t=100, b=100),  # Reduced margins for a compact look
    updatemenus=[dict(
        type="buttons",
        showactive=False,
        x=0.5, y=-0.1,
        xanchor="center", yanchor="top",
        buttons=[dict(label="Play",
                      method="animate",
                      args=[None, dict(frame=dict(duration=500, redraw=True), fromcurrent=True)])
                 ]
    )]
)

# Add frames to the figure
fig.frames = frames

# Display the interactive plot
fig.show()


In [None]:
import pandas as pd
import plotly.graph_objects as go

# Load and prepare the data
ice_df = pd.read_csv('data/ice-sheet-mass-balance.csv')
ice_df['Date'] = pd.to_datetime(ice_df['Day'])
ice_df.sort_values(by='Date', inplace=True)
ice_df['Year'] = ice_df['Date'].dt.year

# Calculate annual cumulative mass change by taking the mean for each year
yearly_avg_df = ice_df.groupby('Year')['Cumulative change in mass in the ice sheets, according to NASA/JPL'].mean().reset_index()

# Define the initial mass (billion metric tons)
initial_mass = 24380
yearly_avg_df['Relative Mass'] = initial_mass + yearly_avg_df['Cumulative change in mass in the ice sheets, according to NASA/JPL']

# Create the figure
fig = go.Figure()

# Add a line trace for cumulative mass change
fig.add_trace(go.Scatter(
    x=yearly_avg_df['Year'],
    y=yearly_avg_df['Relative Mass'],
    mode='lines+markers',
    line=dict(color='cornflowerblue', width=3),
    marker=dict(size=6, color='cornflowerblue', line=dict(width=1, color='white')),
    name="Cumulative Ice Mass"
))

# Update layout with improved aesthetics
fig.update_layout(
    title=dict(
        text="Antarctic Ice Mass Change (Billion Metric Tons)",
        font=dict(size=26, color='darkslategray'),
        x=0.5,  # Center title
        xanchor='center'
    ),
    xaxis=dict(
        title="Year",
        titlefont=dict(size=18, color='darkslategray'),
        tickfont=dict(size=14),
        dtick=1,  # Show all years on the x-axis
        mirror=True,
        range=[yearly_avg_df['Year'].min() - 0.5, yearly_avg_df['Year'].max() + 0.5],  # Set x-axis range from min to max year
        ticks="outside",
        ticklen=6,
        tickcolor="darkgray",
    ),
    yaxis=dict(
        title="Relative Mass (Billion Tons)",
        titlefont=dict(size=18, color='darkslategray'),
        tickfont=dict(size=14),
        showgrid=True,
        gridcolor="lightgray",
        gridwidth=0.5,
        linecolor="darkgray",
        mirror=True,
        range=[0, 25000 + 100],  # Set y-axis range from 0 to slightly above max value
        ticks="outside",
        ticklen=6,
        tickcolor="darkgray"
    ),
    plot_bgcolor="white",
    width=1200,
    height=800,
    margin=dict(l=100, r=100, t=100, b=100)  # Add margins for a better layout
)

# Add a dotted horizontal line at the initial mass level for reference
fig.add_shape(type="line",
    x0=yearly_avg_df['Year'].min(), y0=initial_mass,
    x1=yearly_avg_df['Year'].max(), y1=initial_mass,
    line=dict(color="gray", width=1, dash="dash")
)

# Show plot
fig.show()


### 6. Fossil Fuel Consumption by Country

In [None]:
# Load the data
fossil_fuel_df = pd.read_csv('data/fossil-fuels-per-capita.csv')
energy_country_df = pd.read_csv('data/per-capita-energy-use.csv')

print("Fossil Fuel Data:")
print(fossil_fuel_df.head())
print("\nEnergy Consumption Data:")
print(energy_country_df.head())

In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from matplotlib.patches import Wedge
import numpy as np
from geopy.geocoders import Nominatim
from tqdm import tqdm

# Get the latest year available in both datasets
latest_year_fossil = fossil_fuel_df['Year'].max()
latest_year_energy = energy_country_df['Year'].max()
latest_year = min(latest_year_fossil, latest_year_energy)

# Filter data for the most recent year in both datasets
fossil_fuel_df = fossil_fuel_df[fossil_fuel_df['Year'] == latest_year]
energy_country_df = energy_country_df[energy_country_df['Year'] == latest_year]

# Merge datasets on 'Entity' (country) to align data for each country in the latest year
merged_df = pd.merge(
    fossil_fuel_df, energy_country_df,
    on=['Entity', 'Year'],
    suffixes=('_fossil', '_energy')
)

# Set up a geolocator to find latitude and longitude of each country
geolocator = Nominatim(user_agent="geoapiExercises")
merged_df['Latitude'] = None
merged_df['Longitude'] = None

# Loop through each country and retrieve its latitude and longitude
for i, country in tqdm(enumerate(merged_df['Entity']), total=len(merged_df)):
    try:
        location = geolocator.geocode(country)
        if location:
            merged_df.at[i, 'Latitude'] = location.latitude
            merged_df.at[i, 'Longitude'] = location.longitude
    except:
        print(f"Geocode failed for {country}")

# Drop countries with missing coordinates
merged_df = merged_df.dropna(subset=['Latitude', 'Longitude'])

# Load the world map shapefile downloaded manually
world = gpd.read_file('110m_cultural/ne_110m_admin_0_countries.shp')


In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from matplotlib.patches import Wedge, Patch
import numpy as np
from geopy.geocoders import Nominatim
from tqdm import tqdm

# Define colors for fossil fuel and energy usage
fossil_fuel_color = "royalblue"
energy_color = "orange"

# Plotting function to reduce redundancy
def plot_region(df, title, xlim=None, ylim=None, scale_factor=0.015):
    # Plot the base map with an enlarged figure size
    fig, ax = plt.subplots(figsize=(12, 8))
    world.plot(ax=ax, color="whitesmoke", edgecolor="gray")

    # Set plot limits for specific regions if provided
    if xlim: ax.set_xlim(xlim)
    if ylim: ax.set_ylim(ylim)

    # Plot each country as a split circle (half-fossil fuel, half-energy consumption)
    for _, row in df.iterrows():
        x, y = row['Longitude'], row['Latitude']

        # Calculate separate sizes for fossil fuel and energy usage with reduced scaling factor
        fossil_size = np.sqrt(row['Fossil fuels per capita (kWh)']) * scale_factor
        energy_size = np.sqrt(row['Primary energy consumption per capita (kWh/person)']) * scale_factor

        # Left half (fossil fuel consumption)
        fossil_fuel_wedge = Wedge(
            center=(x, y),
            r=fossil_size,
            theta1=90,
            theta2=270,
            color=fossil_fuel_color,
            alpha=0.7,
            transform=ax.transData._b
        )

        # Right half (energy consumption)
        energy_wedge = Wedge(
            center=(x, y),
            r=energy_size,
            theta1=270,
            theta2=90,
            color=energy_color,
            alpha=0.7,
            transform=ax.transData._b
        )

        # Add wedges to the plot
        ax.add_patch(fossil_fuel_wedge)
        ax.add_patch(energy_wedge)

    # Add a legend for fossil fuel and energy usage
    legend_elements = [
        Patch(facecolor=fossil_fuel_color, edgecolor='black', label='Fossil Fuel Usage'),
        Patch(facecolor=energy_color, edgecolor='black', label='Energy Consumption')
    ]
    ax.legend(handles=legend_elements, loc="lower left", title="Legend", fontsize='medium', title_fontsize='large')

    # Customize the plot
    plt.title(title, fontsize=18)  # Enlarged title font
    ax.set_xticks([])  # Remove longitude ticks
    ax.set_yticks([])  # Remove latitude ticks
    plt.show()

# Global plot
plot_region(merged_df, f"Global Fossil Fuel vs Energy Consumption per Capita in {latest_year}")

# Filter for Europe and plot
europe_countries = [
    'Norway', 'Sweden', 'Finland', 'Denmark', 'Iceland', 'United Kingdom', 'Ireland', 'France', 'Belgium',
    'Netherlands', 'Luxembourg', 'Poland', 'Czech Republic', 'Slovakia', 'Hungary', 'Romania', 'Bulgaria',
    'Italy', 'Spain', 'Portugal', 'Greece'
]
europe_df = merged_df[merged_df['Entity'].isin(europe_countries)]
plot_region(europe_df, f"Europe Fossil Fuel vs Energy Consumption per Capita in {latest_year}", xlim=(-25, 45), ylim=(35, 70))

# Filter for Middle East and plot
middle_east_countries = ['Turkey', 'Iran', 'Iraq', 'Israel', 'Jordan', 'Lebanon', 'Saudi Arabia', 'Yemen', 'Oman']
middle_east_df = merged_df[merged_df['Entity'].isin(middle_east_countries)]
plot_region(middle_east_df, f"Middle East Fossil Fuel vs Energy Consumption per Capita in {latest_year}", xlim=(30, 60), ylim=(10, 40))


### 7. Investment in Renewable Energy

In [None]:
# Load the data
renewable_investment_df = pd.read_csv('data/investment-in-renewable-energy-by-technology.csv')

print(renewable_investment_df.head())

In [None]:
import pandas as pd
import plotly.graph_objects as go

# Load the data
renewable_investment_df = pd.read_csv('data/investment-in-renewable-energy-by-technology.csv')

# Filter for 'World' entity and relevant years
world_investment_df = renewable_investment_df[renewable_investment_df['Entity'] == 'World']
world_investment_df = world_investment_df[(world_investment_df['Year'] >= 2004) & (world_investment_df['Year'] <= 2019)]

# Define the investment categories
investment_categories = [
    'Marine energy', 'Small hydropower', 'Geothermal energy', 'Biofuels',
    'Biomass and waste-to-energy', 'Wind energy', 'Solar energy'
]

# Filter the DataFrame for only the columns needed: Year and Investment Categories
world_investment_df = world_investment_df[['Year'] + investment_categories]

# Convert investment values to billions for readability
for category in investment_categories:
    world_investment_df[category] = world_investment_df[category] / 1e9

# Create frames for each year
frames = []
for year in world_investment_df['Year'].unique():
    df_year = world_investment_df[world_investment_df['Year'] == year]
    frame = go.Frame(
        data=[
            go.Treemap(
                labels=investment_categories,
                parents=[""] * len(investment_categories),
                values=[df_year[category].values[0] for category in investment_categories],
                text=[f"{df_year[category].values[0]:.2f}B" for category in investment_categories],
                textinfo="label+text",
                texttemplate="%{label}<br>%{text}",
                textfont=dict(size=16),
                marker=dict(colors=[df_year[category].values[0] for category in investment_categories],
                            colorscale="RdYlGn"),
                tiling=dict(squarifyratio=1)  # Adjust for more squared cells
            )
        ],
        name=str(year),
        layout=go.Layout(
            annotations=[
                dict(
                    text=f"<b>Investment Distribution in Renewable Technologies for the Year {year}</b>",
                    x=0.5,
                    y=1.15,
                    xref="paper",
                    yref="paper",
                    showarrow=False,
                    font=dict(size=20)
                )
            ]
        )
    )
    frames.append(frame)

# Create the initial figure
fig = go.Figure(
    data=go.Treemap(
        labels=investment_categories,
        parents=[""] * len(investment_categories),
        values=[world_investment_df[world_investment_df['Year'] == 2004][category].values[0] for category in investment_categories],
        text=[f"{world_investment_df[world_investment_df['Year'] == 2004][category].values[0]:.2f}B" for category in investment_categories],
        textinfo="label+text",
        texttemplate="%{label}<br>%{text}",
        textfont=dict(size=16),
        marker=dict(colors=[world_investment_df[world_investment_df['Year'] == 2004][category].values[0] for category in investment_categories],
                    colorscale="RdYlGn"),
        tiling=dict(squarifyratio=1)  # Adjust aspect ratio
    ),
    layout=go.Layout(
        title=dict(
            text="Global Investment in Renewable Energy by Technology (2004 - 2019)",
            font=dict(size=24),
            x=0.5,
            xanchor='center',
            y=0.95  # Move title down slightly
        ),
        annotations=[
            dict(
                text="<b>Investment Distribution in Renewable Technologies for the Year 2004</b>",
                x=0.5,
                y=1.15,  # Move year indicator slightly higher
                xref="paper",
                yref="paper",
                showarrow=False,
                font=dict(size=20)
            )
        ],
        margin=dict(t=200, b=100),  # Increase top and bottom margins for more space
        coloraxis_colorbar=dict(
            title="Investment (Billion USD)",
            tickprefix="$",
            ticks="outside"
        ),
        updatemenus=[dict(
            type="buttons",
            showactive=False,
            x=0.5,
            xanchor="center",
            y=-0.3,  # Move play button further below
            buttons=[dict(label="Play",
                          method="animate",
                          args=[None, dict(frame=dict(duration=1000, redraw=True), fromcurrent=True)])]
        )]
    ),
    frames=frames
)

# Update layout and color bar
fig.update_layout(
    width=1200,
    height=900,  # Increase height for additional spacing
    coloraxis_colorbar=dict(
        title="Investment (Billion USD)",
        tickprefix="$",
        ticks="outside"
    )
)

# Show plot
fig.show()


### 8. Natural Disasters Timeline

In [None]:
# Load the data
natural_disasters_df = pd.read_csv('data/natural-disasters.csv')

In [None]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np

# Load the data
natural_disasters_df = pd.read_csv('data/natural-disasters.csv')

# Filter for global totals (Country name = "World") and years after 1960
natural_disasters_df = natural_disasters_df[(natural_disasters_df["Country name"] == "World") & (natural_disasters_df["Year"] > 1960)]

# Define the relevant columns for the categories
disaster_categories = {
    "Drought": "Number of total people affected by drought",
    "Flood": "Number of total people affected by floods",
    "Storm": "Number of total people affected by storms",
    "Wildfire": "Number of total people affected by wildfires",
    "Extreme Temperature": "Number of total people affected by extreme temperatures"
}

# Verify column names and filter the dataset for necessary columns, filling NaNs with 0s
filtered_df = natural_disasters_df[["Year"] + list(disaster_categories.values())].fillna(0)

# Convert relevant columns to numeric in case they are read as strings
for col in disaster_categories.values():
    filtered_df[col] = pd.to_numeric(filtered_df[col], errors='coerce').fillna(0)

# Melt the data for easier plotting
melted_df = filtered_df.melt(id_vars="Year",
                             value_vars=list(disaster_categories.values()),
                             var_name="Disaster Type",
                             value_name="People Affected")

# Map the column names to disaster type labels
melted_df["Disaster Type"] = melted_df["Disaster Type"].map({
    "Number of total people affected by drought": "Drought",
    "Number of total people affected by floods": "Flood",
    "Number of total people affected by storms": "Storm",
    "Number of total people affected by wildfires": "Wildfire",
    "Number of total people affected by extreme temperatures": "Extreme Temperature"
})

# Calculate the global maximum number of people affected across all categories for sizing
global_max_people_affected = melted_df["People Affected"].max()
sizeref = np.sqrt(global_max_people_affected)

# Create a bubble chart (timeline) with color intensity and size based on affected population
fig = go.Figure()

# Plot each disaster type as a separate trace with continuous color scale and size based on the affected number
fig.add_trace(
    go.Scatter(
        x=melted_df["Year"],
        y=melted_df["Disaster Type"],
        mode="markers",
        marker=dict(
            size=np.sqrt(melted_df["People Affected"]) * 170,  # Adjust size with square root
            sizemode="diameter",
            sizeref=sizeref,
            color=melted_df["People Affected"],  # Continuous color scale
            colorscale="Viridis",
            colorbar=dict(title="People Affected", titlefont=dict(size=20), tickfont=dict(size=16)),
            opacity=0.5
        ),
        text=melted_df["People Affected"].apply(lambda x: f"{int(x):,} people affected"),  # Hover info
        hoverinfo="text+x"
    )
)

# Update layout for readability with increased font sizes
fig.update_layout(
    title=dict(
        text="Timeline of People Affected by Natural Disasters (Drought, Flood, Storm, Wildfire, Extreme Temperature)",
        font=dict(size=24)
    ),
    xaxis=dict(title="Year", titlefont=dict(size=20), tickfont=dict(size=16), tickformat="d"),
    yaxis=dict(title="Disaster Type", titlefont=dict(size=20), tickfont=dict(size=16), categoryorder="total descending"),
    width=2400,
    height=1300,
    showlegend=False  # Remove legend for disaster types
)

# Define the annotations with the event information and color
annotations = [
    {"year": 2019, "disaster": "Wildfire", "text": "2019 Wildfire Season"},
    {"year": 2008, "disaster": "Extreme Temperature", "text": "2008 heatwaves in USA and Australia, severe winter in China"},
    {"year": 1998, "disaster": "Flood", "text": "1998 China's Yangtze River Floods"},
    {"year": 2002, "disaster": "Drought", "text": "2002 India worst drought"},
    {"year": 1987, "disaster": "Drought", "text": "Severe droughts in USA, China and Africa"},
    {"year": 2016, "disaster": "Storm", "text": "Hurricane Matthew"},
    {"year": 2002, "disaster": "Storm", "text": "2002 European windstorm + Hurricane Lili + Typhoon Rusa"},
]

# Add each annotation to the figure
for ann in annotations:
    fig.add_annotation(
        x=ann["year"],
        y=ann["disaster"],
        xref="x",
        yref="y",
        text=ann["text"],
        showarrow=True,
        arrowhead=2,
        arrowcolor="firebrick",  # Professional red color
        font=dict(size=18, color="firebrick"),  # Same color for consistency
        ax=40,  # Adjust position of the annotation text
        ay=-100
    )

# Show the plot
fig.show()


### Ocean Depth Temperature

In [None]:
import pandas as pd
import glob

# Define the column names based on depth levels
depths = [0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 125, 150, 175, 200, 225,
          250, 275, 300, 325, 350, 375, 400, 425, 450, 475, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950, 1000,
          1050, 1100, 1150, 1200, 1250, 1300, 1350, 1400, 1450, 1500, 1550, 1600, 1650, 1700, 1750, 1800, 1850, 1900,
          1950, 2000, 2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000, 3100, 3200, 3300, 3400, 3500, 3600,
          3700, 3800, 3900, 4000, 4100, 4200, 4300, 4400, 4500, 4600, 4700, 4800, 4900, 5000, 5100, 5200, 5300, 5400,
          5500]
column_names = ['Latitude', 'Longitude'] + [f'Depth_{depth}' for depth in depths]

# Create a mapping of filenames to decade ranges
filename_to_decade = {
    "woa23_5564_t00an01.csv": "1955-1964",
    "woa23_6574_t00an01.csv": "1965-1974",
    "woa23_7584_t00an01.csv": "1975-1984",
    "woa23_8594_t00an01.csv": "1985-1994",
    "woa23_95A4_t00an01.csv": "1995-2004",
    "woa23_A5B4_t00an01.csv": "2005-2014",
    "woa23_B5C2_t00an01.csv": "2015-2022"
}

# Combine all files into a single DataFrame
all_data = []

for file in glob.glob("data/ocean_temp/woa23_*.csv"):
    # Extract the filename (last part of the path)
    filename = file.split('/')[-1]

    # Use the map to get the decade
    decade = filename_to_decade.get(filename, "Unknown Decade")

    # Read the CSV file, skipping the first two lines and adding column names
    df = pd.read_csv(
        file,
        skiprows=2,
        names=column_names,
        na_values=['']  # Handle empty fields as NaN
    )

    # Add the decade column
    df['Decade'] = decade

    # Append to the list
    all_data.append(df)

# Concatenate all dataframes into one
combined_df = pd.concat(all_data, ignore_index=True)

# Display the combined DataFrame
print(combined_df.head())


In [None]:
import pandas as pd
import plotly.express as px

# Assuming 'combined_df' is already loaded as per the previous code

# Set the depth level to visualize (e.g., 0 meters)
depth_level = 'Depth_0'

# Filter data for the selected depth level and drop any NaN values in the temperature column
filtered_df = combined_df[['Latitude', 'Longitude', depth_level, 'Decade']].dropna()
filtered_df = filtered_df.rename(columns={depth_level: 'Temperature'})

# Define the ordered list of decades for the categorical data type
decade_order = [
    "1955-1964", "1965-1974", "1975-1984", "1985-1994",
    "1995-2004", "2005-2014", "2015-2022"
]
# Ensure the "Decade" column respects this order
filtered_df['Decade'] = pd.Categorical(filtered_df['Decade'], categories=decade_order, ordered=True)

# # Convert the "Temperature" column to Celsius for better readability (from Fahrenheit)
# filtered_df['Temperature'] = (filtered_df['Temperature'] - 32) * 5 / 9

# Calculate dynamic color range with a buffer of ±10 degrees around min and max values
temp_min = filtered_df["Temperature"].min() - 10
temp_max = filtered_df["Temperature"].max() + 10

# Create an interactive map with increased resolution
fig = px.scatter_geo(
    filtered_df.sort_values("Decade"),  # Sort by "Decade" to enforce order in the animation slider
    lat="Latitude",
    lon="Longitude",
    color="Temperature",
    animation_frame="Decade",
    color_continuous_scale="RdBu_r",
    range_color=(temp_min, temp_max),  # Set the color range with ±10 buffer
    title=f"Global Ocean Temperature at {' - '.join([x for x in depth_level.split('_')])} Meters (by Decade)",
    projection="natural earth"
)

# Update layout for higher resolution and Celsius indication
fig.update_layout(
    width=1200,     # Set width to 1200 pixels
    height=800,     # Set height to 800 pixels for a larger plot
    coloraxis_colorbar=dict(title="Temperature (°C)"),  # Indicate Celsius
    title_x=0.5,  # Center title
)

# Show the plot
fig.show()


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d.art3d import Poly3DCollection

# Assuming 'combined_df' is already loaded as per the previous code

# Define the depth levels we are interested in
depth_levels = [col for col in combined_df.columns if col.startswith("Depth_")]
decade_order = [
    "1955-1964", "1965-1974", "1975-1984", "1985-1994",
    "1995-2004", "2005-2014", "2015-2022"
]

# Filter and calculate average temperature for each depth and decade
filtered_df = combined_df[['Decade'] + depth_levels].dropna()
filtered_df['Decade'] = pd.Categorical(filtered_df['Decade'], categories=decade_order, ordered=True)
avg_temp_df = filtered_df.groupby('Decade')[depth_levels].mean().reset_index()

# Convert the data to long format for easier plotting
avg_temp_melted = avg_temp_df.melt(id_vars="Decade", var_name="Depth", value_name="Temperature")
avg_temp_melted["Depth"] = avg_temp_melted["Depth"].str.replace("Depth_", "").astype(int)  # Convert depth to numeric

# Plot settings
fig = plt.figure(figsize=(14, 10))
ax = fig.add_subplot(111, projection='3d')
ax.set_box_aspect([1, 1, 2])  # Set 3D aspect ratio

# Sort depth levels to ensure correct ordering
depth_intervals = sorted(avg_temp_melted['Depth'].unique())

for i, decade in enumerate(decade_order):
    for j, depth in enumerate(depth_intervals):
        temp = avg_temp_melted[(avg_temp_melted['Decade'] == decade) & (avg_temp_melted['Depth'] == depth)]['Temperature'].values[0]
        color = plt.cm.RdBu_r((temp - avg_temp_melted['Temperature'].min()) /
                              (avg_temp_melted['Temperature'].max() - avg_temp_melted['Temperature'].min()))

        # Define depth range with a dynamic step
        depth_step = depth_intervals[j + 1] - depth if j + 1 < len(depth_intervals) else depth_intervals[j] - depth_intervals[j - 1]
        z_base = depth
        z_top = depth - depth_step

        # Define vertices for each layer
        x = [i, i+1, i+1, i]
        y = [0, 0, 1, 1]

        # Create faces for the rectangular prism
        faces = [
            [(x[0], y[0], z_base), (x[1], y[1], z_base), (x[1], y[1], z_top), (x[0], y[0], z_top)],  # Front
            [(x[1], y[1], z_base), (x[2], y[2], z_base), (x[2], y[2], z_top), (x[1], y[1], z_top)],  # Right
            [(x[2], y[2], z_base), (x[3], y[3], z_base), (x[3], y[3], z_top), (x[2], y[2], z_top)],  # Back
            [(x[3], y[3], z_base), (x[0], y[0], z_base), (x[0], y[0], z_top), (x[3], y[3], z_top)],  # Left
            [(x[0], y[0], z_top), (x[1], y[1], z_top), (x[2], y[2], z_top), (x[3], y[3], z_top)],    # Ceiling (top)
            [(x[0], y[0], z_base), (x[1], y[1], z_base), (x[2], y[2], z_base), (x[3], y[3], z_base)]  # Floor (bottom)
        ]

        # Draw each face of the rectangular prism with its color
        ax.add_collection3d(Poly3DCollection(faces, color=color, edgecolor="k", linewidths=0.2, alpha=0.8))

# Set axis labels and title, and move labels and ticks further from the plot
ax.set_xticks(range(len(decade_order)))
ax.set_xticklabels(decade_order, rotation=45, ha='right', fontsize=12)
ax.set_xlabel("Decade", fontsize=15, labelpad=50)  # Increased label padding

ax.set_yticks([])  # Remove y-axis ticks
ax.set_yticklabels([])  # Hide y-axis labels

ax.set_zlabel("Depth (m)", fontsize=15, labelpad=50)  # Increased label padding for depth
ax.set_zlim([max(depth_intervals), 0])  # Reverse depth axis to have 0 at the top
ax.zaxis.set_tick_params(pad=20)  # Add padding to move depth ticks further from plot

# Color bar for temperature
mappable = plt.cm.ScalarMappable(cmap="RdBu_r", norm=plt.Normalize(vmin=avg_temp_melted['Temperature'].min(), vmax=avg_temp_melted['Temperature'].max()))
mappable.set_array(avg_temp_melted['Temperature'])
fig.colorbar(mappable, ax=ax, shrink=0.5, aspect=10, pad=0.1, label="Temperature (°C)")

plt.title("3D Water Column Temperature by Depth and Decade", fontsize=18, pad=40, loc="center")
plt.show()


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d.art3d import Poly3DCollection

# Assuming 'combined_df' is already loaded as per the previous code

# Define the depth levels we are interested in
depth_levels = [col for col in combined_df.columns if col.startswith("Depth_")]
decade_order = [
    "1955-1964", "1965-1974", "1975-1984", "1985-1994",
    "1995-2004", "2005-2014", "2015-2022"
]

# Filter and calculate average temperature for each depth and decade
filtered_df = combined_df[['Decade'] + depth_levels].dropna()
filtered_df['Decade'] = pd.Categorical(filtered_df['Decade'], categories=decade_order, ordered=True)
avg_temp_df = filtered_df.groupby('Decade')[depth_levels].mean().reset_index()

# Calculate the temperature change from the first decade
baseline = avg_temp_df[avg_temp_df['Decade'] == "1955-1964"][depth_levels].iloc[0]
temp_change_df = avg_temp_df.copy()
temp_change_df[depth_levels] = avg_temp_df[depth_levels] - baseline

# Convert the data to long format for easier plotting
temp_change_melted = temp_change_df.melt(id_vars="Decade", var_name="Depth", value_name="TemperatureChange")
temp_change_melted["Depth"] = temp_change_melted["Depth"].str.replace("Depth_", "").astype(int)  # Convert depth to numeric

# Plot settings
fig = plt.figure(figsize=(14, 10))
ax = fig.add_subplot(111, projection='3d')
ax.set_box_aspect([1, 1, 2])  # Set 3D aspect ratio

# Sort depth levels to ensure correct ordering
depth_intervals = sorted(temp_change_melted['Depth'].unique())

for i, decade in enumerate(decade_order):
    for j, depth in enumerate(depth_intervals):
        temp_change = temp_change_melted[(temp_change_melted['Decade'] == decade) & (temp_change_melted['Depth'] == depth)]['TemperatureChange'].values[0]
        
        # Normalize the temperature change from -5 to 5 for color mapping
        color = plt.cm.RdBu_r((temp_change + 2) / 4)  # Map -5 to blue and +5 to red

        # Define depth range with a dynamic step
        depth_step = depth_intervals[j + 1] - depth if j + 1 < len(depth_intervals) else depth_intervals[j] - depth_intervals[j - 1]
        z_base = depth
        z_top = depth - depth_step

        # Define vertices for each layer
        x = [i, i+1, i+1, i]
        y = [0, 0, 1, 1]

        # Create faces for the rectangular prism
        faces = [
            [(x[0], y[0], z_base), (x[1], y[1], z_base), (x[1], y[1], z_top), (x[0], y[0], z_top)],  # Front
            [(x[1], y[1], z_base), (x[2], y[2], z_base), (x[2], y[2], z_top), (x[1], y[1], z_top)],  # Right
            [(x[2], y[2], z_base), (x[3], y[3], z_base), (x[3], y[3], z_top), (x[2], y[2], z_top)],  # Back
            [(x[3], y[3], z_base), (x[0], y[0], z_base), (x[0], y[0], z_top), (x[3], y[3], z_top)],  # Left
            [(x[0], y[0], z_top), (x[1], y[1], z_top), (x[2], y[2], z_top), (x[3], y[3], z_top)],    # Ceiling (top)
            [(x[0], y[0], z_base), (x[1], y[1], z_base), (x[2], y[2], z_base), (x[3], y[3], z_base)]  # Floor (bottom)
        ]

        # Draw each face of the rectangular prism with its color
        ax.add_collection3d(Poly3DCollection(faces, color=color, edgecolor="k", linewidths=0.2, alpha=0.8))

# Set axis labels and title, and move labels and ticks further from the plot
ax.set_xticks(range(len(decade_order)))
ax.set_xticklabels(decade_order, rotation=45, ha='right', fontsize=12)
ax.set_xlabel("Decade", fontsize=15, labelpad=50)  # Increased label padding

ax.set_yticks([])  # Remove y-axis ticks
ax.set_yticklabels([])  # Hide y-axis labels

ax.set_zlabel("Depth (m)", fontsize=15, labelpad=50)  # Increased label padding for depth
ax.set_zlim([max(depth_intervals), 0])  # Reverse depth axis to have 0 at the top
ax.zaxis.set_tick_params(pad=20)  # Add padding to move depth ticks further from plot

# Color bar for temperature change with a range from -5 to 5
mappable = plt.cm.ScalarMappable(cmap="RdBu_r", norm=plt.Normalize(vmin=-2, vmax=2))
mappable.set_array(temp_change_melted['TemperatureChange'])
fig.colorbar(mappable, ax=ax, shrink=0.5, aspect=10, pad=0.1, label="Temperature Change (°C)")

plt.title("3D Water Column Temperature Change by Depth and Decade", fontsize=18, pad=40, loc="center")
plt.show()


### 10. Renewable Energy Generation by Source

In [None]:
# Load the data
renewable_generation_df = pd.read_csv('data/modern-renewable-energy-consumption.csv')

print(renewable_generation_df.head())

In [None]:
import pandas as pd
import plotly.graph_objects as go

# Load the data
renewable_generation_df = pd.read_csv('data/modern-renewable-energy-consumption.csv')

# Filter for the "World" entity
world_df = renewable_generation_df[renewable_generation_df['Entity'] == 'World']

# Drop rows where all generation values are NaN
world_df = world_df.dropna(subset=[
    'Other renewables (including geothermal and biomass) electricity generation - TWh',
    'Solar generation - TWh',
    'Wind generation - TWh',
    'Hydro generation - TWh'
])

# Define data columns and labels
energy_sources = {
    'Other renewables (including geothermal and biomass) electricity generation - TWh': 'Other Renewables',
    'Solar generation - TWh': 'Solar',
    'Wind generation - TWh': 'Wind',
    'Hydro generation - TWh': 'Hydro'
}

# Create the figure and add traces for each energy source
fig = go.Figure()

for column, label in energy_sources.items():
    fig.add_trace(go.Scatter(
        x=world_df['Year'],
        y=world_df[column].fillna(0),
        mode='lines',
        name=label,
        stackgroup='one'
    ))

# Update layout for readability and aesthetics
fig.update_layout(
    title="Global Renewable Energy Generation by Source (TWh)",
    title_font=dict(size=24),
    title_x=0.5,
    xaxis=dict(
        title="Year",
        showgrid=True,
        gridcolor="lightgray",
        tickfont=dict(size=12),
        titlefont=dict(size=18)
    ),
    yaxis=dict(
        title="Electricity Generation (TWh)",
        showgrid=True,
        gridcolor="lightgray",
        tickfont=dict(size=12),
        titlefont=dict(size=18)
    ),
    plot_bgcolor="white",
    width=1200,
    height=800,
    legend=dict(title="Energy Sources", font=dict(size=12)),
    margin=dict(l=80, r=40, t=80, b=60)
)

# Show plot
fig.show()


### 11. Energy (Fossil Fuels, Nuclear, Renewables)

In [None]:
# Load the data
energy_sources_df = pd.read_csv('data/elec-fossil-nuclear-renewables.csv')

print(energy_sources_df.head())

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load data
energy_sources_df = pd.read_csv('data/elec-fossil-nuclear-renewables.csv')

# Define the manually selected top 6 entities
top_6_entities = ['United States', 'China', 'India', 'Russia', 'Japan', 'Germany']

# Filter data to only include the top 6 entities and years from 1990 onwards
top_6_data = energy_sources_df[
    (energy_sources_df['Entity'].isin(top_6_entities)) & 
    (energy_sources_df['Year'] >= 1990)
]

# Calculate percentage share for each energy type
top_6_data['Total Electricity'] = (
    top_6_data['Electricity from renewables - TWh (adapted for visualization of chart elec-fossil-nuclear-renewables)'] +
    top_6_data['Electricity from nuclear - TWh (adapted for visualization of chart elec-fossil-nuclear-renewables)'] +
    top_6_data['Electricity from fossil fuels - TWh (adapted for visualization of chart elec-fossil-nuclear-renewables)']
)
top_6_data['Renewables %'] = (top_6_data['Electricity from renewables - TWh (adapted for visualization of chart elec-fossil-nuclear-renewables)'] / top_6_data['Total Electricity']) * 100
top_6_data['Nuclear %'] = (top_6_data['Electricity from nuclear - TWh (adapted for visualization of chart elec-fossil-nuclear-renewables)'] / top_6_data['Total Electricity']) * 100
top_6_data['Fossil Fuels %'] = (top_6_data['Electricity from fossil fuels - TWh (adapted for visualization of chart elec-fossil-nuclear-renewables)'] / top_6_data['Total Electricity']) * 100

# Set up the plot with 2 rows and 3 columns
sns.set_theme(style="whitegrid")
fig, axes = plt.subplots(2, 3, figsize=(18, 10), sharey=True)
fig.suptitle('Electricity Production by Source for Top 6 Producers (1990 - 2023)', fontsize=18, weight='bold', y=1.02)

# Colors for each energy type
colors = ["#66c2a5", "#fc8d62", "#8da0cb"]  # Renewables, Nuclear, Fossil Fuels

# Plot each entity's data in a stacked area format
for i, entity in enumerate(top_6_entities):
    entity_data = top_6_data[top_6_data['Entity'] == entity].sort_values('Year')
    ax = axes[i // 3, i % 3]  # Arrange in 2 rows and 3 columns
    
    # Determine the start and end years for the x-axis
    start_year = entity_data['Year'].min()
    end_year = entity_data['Year'].max()
    
    # Prepare data for stacking
    renewables_pct = entity_data['Renewables %']
    nuclear_pct = entity_data['Nuclear %']
    fossil_pct = entity_data['Fossil Fuels %']
    years = entity_data['Year']
    
    # Plot stacked areas with borders for each section
    ax.fill_between(years, 0, renewables_pct, label='Renewables', color=colors[0], alpha=0.8)
    ax.fill_between(years, renewables_pct, renewables_pct + nuclear_pct, label='Nuclear', color=colors[1], alpha=0.8)
    ax.fill_between(years, renewables_pct + nuclear_pct, renewables_pct + nuclear_pct + fossil_pct, label='Fossil Fuels', color=colors[2], alpha=0.8)
    
    # Add borders to each stacked area
    ax.plot(years, renewables_pct, color="white", linewidth=0.5)
    ax.plot(years, renewables_pct + nuclear_pct, color="white", linewidth=0.5)
    ax.plot(years, renewables_pct + nuclear_pct + fossil_pct, color="white", linewidth=0.5)

    # Set title, y-axis, and x-axis labels
    ax.set_title(entity, fontsize=14, weight='bold', color="#333333")
    ax.set_ylim(0, 100)  # Limit y-axis to 100% for all subplots
    ax.set_yticks(range(0, 101, 20))  # Set percentage tick marks
    ax.set_yticklabels([f"{tick}%" for tick in range(0, 101, 20)], fontsize=12)  # Display y-ticks as percentages
    ax.set_xlabel("Year", fontsize=14)
    ax.set_ylabel("Electricity Production (%)", fontsize=14)
    
    # Set the x-axis limits to match the exact start and end years
    ax.set_xlim(start_year, end_year)
    
    # Increase the font size of tick labels
    ax.tick_params(axis='both', which='major', labelsize=12)

# Create a single common legend outside the subplots with enhanced styling
handles, labels = ax.get_legend_handles_labels()
fig.legend(handles[:3], labels[:3], loc='upper center', ncol=3, bbox_to_anchor=(0.5, 1.12), fontsize=14, frameon=False)

# Adjust layout to add larger margins
plt.tight_layout(rect=[100, 100, 100, 100])  # Increased margins
plt.subplots_adjust(hspace=0.4, wspace=0.4)  # Added spacing between subplots
plt.show()
