In [None]:
# Imports
import polars as pl
import altair as alt

In [None]:
def rdd_bdgt_df():
    return pl.read_csv("data/OECD.IEA,RDDPUBLIC,1.0,filtered,2025-10-16 03-48-20.csv").drop(
        ["STRUCTURE", "STRUCTURE_ID", "STRUCTURE_NAME", "ACTION", "Decimals", 
        "FREQUENCY", "Frequency", "RDD_TYPE", "Type", "Time Period", 
        "RDD_SECTOR", "Sector", "Confidential Status"])

In [None]:
def rdd_bdgt_oecd_europe_df():
    oecd_europe_countries = [
    "AUSTRIA", "BELGIUM", "CZECH", "DENMARK", "ESTONIA", 
    "FINLAND", "FRANCE", "GERMANY", "GREECE", "HUNGARY",
    "IRELAND", "ITALY", "LITHUANIA", 
    "LUXEMBOURG", "NETHERLANDS", "NORWAY", "POLAND", "PORTUGAL", 
    "SLOVAKIA", "SPAIN", "SWEDEN", "SWITZERLAND", 
    "UNITED KINGDOM"
    ]

    return rdd_bdgt_df().filter(pl.col("COUNTRY").is_in(oecd_europe_countries))

In [None]:
rdd_bdgt_oecd_europe_df()

In [None]:
rdd_bdgt_oecd_europe_df().filter(
    (pl.col("COUNTRY") == "HUNGARY") & (pl.col("TIME_PERIOD") == 2021)
).group_by(
    "RDD_TECH", "TIME_PERIOD"
).agg(
    pl.col("OBS_VALUE").sum()
).sort("TIME_PERIOD", "RDD_TECH")

In [None]:
rdd_bdgt_oecd_europe_df().filter(
    (pl.col("COUNTRY") == "HUNGARY") & (pl.col("TIME_PERIOD") == 2022)
).group_by(
    "RDD_TECH", "TIME_PERIOD"
).agg(
    pl.col("OBS_VALUE").sum()
).sort("TIME_PERIOD", "RDD_TECH")

In [None]:

alt.Chart(rdd_bdgt_oecd_europe_df().filter(pl.col("Technology") != "Total")).mark_bar().encode(
    x="COUNTRY:N",
    y="OBS_VALUE:Q",
    xOffset="TIME_PERIOD:N",
    color="TIME_PERIOD:N"
)


In [None]:
def china_solar_exports_df():
    df = pl.read_csv("data/mart_solar_exports_full_release_monthly.csv")
    
    # This is the correct pattern
    return (
        df.with_columns(
            pl.col("Date").str.to_date()  # 1. Update the Date column
        )
        .filter(
            # 2. THEN, filter the entire DataFrame
            pl.col("Area").is_in([
                "Other Oceania", 
                "Other Africa", 
                "Other Latin America"
            ]).not_() # (or use the ~ operator)
        )
    )

In [None]:
def china_solar_exports_countries_df():
    return china_solar_exports_df().filter(
        pl.col("Area type") == "Country or economy"
    )

china_solar_exports_countries_df()

In [None]:
def china_solar_exports_regions_df(region: str):
    df = china_solar_exports_df().filter(pl.col("Area type") == "Region")

    if region == "world":
        return df.filter(pl.col("Area") == "World")
    elif region == "regions":
          return df.filter(pl.col("Area") != "World")
    else:
        return df.filter(pl.col("Area") == region)
    
def china_solar_exports_monthly(region: str):
    df = china_solar_exports_regions_df(region)

    result = (
        df.group_by(["Area", "Date", "Commodity type"]) 
        .agg(pl.col("Capacity (MW)").sum().alias("Total Capacity (MW)"))
        .rename({"Area": "Region"})
        .sort(["Region", "Date", "Commodity type"])
    )

    return result

def china_solar_exports_monthly_by_region(region: str):
    
    # 1. Get the detailed data (with commodity types)
    detailed_df = china_solar_exports_monthly(region)
    
    # 2. Add a final aggregation to sum up the commodity types
    region_agg_df = (
        detailed_df.group_by(["Region", "Date"])
        .agg(pl.col("Total Capacity (MW)").sum()) # Sums the commodities
        .sort(["Region", "Date"])
    )
    return region_agg_df


In [None]:
china_solar_exports_monthly_by_region("regions") 

In [None]:
def plot_world_exports(color: str):
    """
    Creates a bar chart of China's total monthly solar exports to the world.
    """
    chart = (
        alt.Chart(china_solar_exports_monthly("world"))
        .mark_bar(color=color) 
        .encode(
            x=alt.X(
                "Date:T",
                title="Year",
                axis=alt.Axis(
                    format="%Y",
                    tickCount="year",
                )
            ),
            y=alt.Y("Total Capacity (MW):Q", title="Total Capacity (MW)"),
            tooltip=[
                alt.Tooltip("Date:T", title="Month"),
                alt.Tooltip("Total Capacity (MW):Q", title="Total Capacity (MW)")
            ]
        )
        .properties(width=700, height=400, title="China Solar Exports - By Monthly Capacity (World)")
    )
    
    return chart

plot_world_exports("#C02C38")

In [None]:
plot_world_exports("#E77C8E")

In [None]:
def plot_world_exports_stacked_bar():
    """
    Creates a stacked bar chart of China's monthly solar exports
    by technology, with a custom color scheme.
    """
    
    # 1. Load the data
    df_world_details = china_solar_exports_monthly("world")

    domain_ = ["Panels", "Cells", "Wafers"] 
    range_ = ['#BACF65', '#41AE3C', '#ADD5A2']

    # 2. Create the stacked BAR chart
    stacked_bar_chart = (
        alt.Chart(df_world_details)
        .mark_bar() 
        .encode(
            # X-axis: Time (Date)
            x=alt.X(
                "Date:T",
                title="Year",
                axis=alt.Axis(format="%Y") 
            ),
            
            # Y-axis: Total capacity, stacked to zero
            y=alt.Y(
                "Total Capacity (MW):Q", 
                title="Total Capacity (MW)", 
                stack="zero"
            ),
            
            # Color: Set the custom domain and range
            color=alt.Color(
                "Commodity type:N", 
                title="Technology",
                scale=alt.Scale(domain=domain_, range=range_)
            ),
            
            # Tooltip: Show details on hover
            tooltip=[
                alt.Tooltip("Date:T", title="Date", format="%Y-%m"),
                alt.Tooltip("Commodity type:N", title="Technology"),
                alt.Tooltip("Total Capacity (MW):Q", title="Monthly Capacity (MW)", format=",")
            ]
        )
        .properties(
            title="China's Monthly Solar Exports by Technology (World)",
            width=750,
            height=400
        )
        .interactive() 
    )
    
    return stacked_bar_chart


plot_world_exports_stacked_bar()

In [None]:
def plot_world_exports_stacked_bar():
    """
    Creates a stacked bar chart of China's monthly solar exports
    by technology, with a custom color scheme.
    """
    
    # 1. Load data
    df_world_details = china_solar_exports_monthly("world")

    domain_ = ["Panels", "Cells", "Wafers"] 
    range_ = ['#FBA414', '#12AA9C', '#5A1216']

    # 2. Create the stacked BAR chart
    stacked_bar_chart = (
        alt.Chart(df_world_details)
        .mark_bar() 
        .encode(
            # X-axis: Time (Date)
            x=alt.X(
                "Date:T",
                title="Year",
                axis=alt.Axis(format="%Y") 
            ),
            
            # Y-axis: Total capacity, stacked to zero
            y=alt.Y(
                "Total Capacity (MW):Q", 
                title="Total Capacity (MW)", 
                stack="zero"
            ),
            
            # Color: Set the custom domain and range
            color=alt.Color(
                "Commodity type:N", 
                title="Technology",
                scale=alt.Scale(domain=domain_, range=range_)
            ),
            
            # Tooltip: Show details on hover
            tooltip=[
                alt.Tooltip("Date:T", title="Date", format="%Y-%m"),
                alt.Tooltip("Commodity type:N", title="Technology"),
                alt.Tooltip("Total Capacity (MW):Q", title="Monthly Capacity (MW)", format=",")
            ]
        )
        .properties(
            title="China's Monthly Solar Exports by Technology (World)",
            width=750,
            height=400
        )
        .interactive()
    )
    
    return stacked_bar_chart

plot_world_exports_stacked_bar()

In [None]:
def worl_exports_stacked_area():
    df_world_details = china_solar_exports_monthly("world")

    domain_ = ["Panels", "Cells", "Wafers"] 
    range_ = ['#FBA414', '#12AA9C', '#5A1216']

    # 2. Create the stacked area chart
    stacked_area_chart = (
        alt.Chart(df_world_details)
        .mark_area()
        .encode(
            # X-axis: Time (Date)
            x=alt.X(
                "Date:T",
                title="Year",
                axis=alt.Axis(format="%Y", tickCount="year")
            ),
            
            # Y-axis: Total capacity, stacked to zero
            y=alt.Y(
                "Total Capacity (MW):Q", 
                title="Total Capacity (MW)", 
                stack="zero"
            ),
            
            # Color: The different technology types
            color=alt.Color(
                "Commodity type:N", 
                title="Technology",
                scale=alt.Scale(domain=domain_, range=range_)
            ),
            
            # Tooltip: Show details on hover
            tooltip=[
                alt.Tooltip("Date:T", title="Date", format="%Y-%m"),
                alt.Tooltip("Commodity type:N", title="Technology"),
                alt.Tooltip("Total Capacity (MW):Q", title="Monthly Capacity (MW)", format=",")
            ]
        )
        .properties(
            title="China's Monthly Solar Exports by Technology (World)",
            width=750,
            height=400
        )
        .interactive()
    )

    return stacked_area_chart

worl_exports_stacked_area()

In [None]:

df_world_details = china_solar_exports_monthly("world")

domain_ = ["Panels", "Cells", "Wafers"] 
range_ = ['#FCA106', '#F8E0B0', '#826B48']

# 2. Create the stacked area chart
stacked_area_chart = (
    alt.Chart(df_world_details)
    .mark_area()
    .encode(
        # X-axis: Time (Date)
        x=alt.X(
            "Date:T",
            title="Year",
            axis=alt.Axis(format="%Y", tickCount="year")
        ),
        
        # Y-axis: Total capacity, stacked to zero
        y=alt.Y(
            "Total Capacity (MW):Q", 
            title="Total Capacity (MW)", 
            stack="zero"
        ),
        
        # Color: The different technology types
        color=alt.Color(
            "Commodity type:N", 
            title="Technology",
            scale=alt.Scale(domain=domain_, range=range_)
        ),
        
        # Tooltip: Show details on hover
        tooltip=[
            alt.Tooltip("Date:T", title="Date", format="%Y-%m"),
            alt.Tooltip("Commodity type:N", title="Technology"),
            alt.Tooltip("Total Capacity (MW):Q", title="Monthly Capacity (MW)", format=",")
        ]
    )
    .properties(
        title="China's Monthly Solar Exports by Technology (World)",
        width=750,
        height=400
    )
    .interactive()
)

stacked_area_chart

In [None]:
def plot_regional_exports_by_month():
    """
    Creates a line chart of China's monthly solar exports for each region,
    with points and labels at the end of each line.
    """
    
    # Create the base chart
    base = alt.Chart(china_solar_exports_monthly_by_region("regions")).encode(
        x=alt.X(
            "Date:T",
            title="Year",
            axis=alt.Axis(format="%Y", tickCount="year")
        ),
        y=alt.Y("Total Capacity (MW):Q", title="Total Capacity (MW)"),
        color=alt.Color("Region:N", title="Region")
    )

    # Line chart layer
    lines = base.mark_line().encode(
        tooltip=["Region", "Date:T", "Total Capacity (MW):Q"]
    )

    # Points layer for the last value of each region
    last_points = (
    base
    .transform_window(
        rank='rank()', sort=[alt.SortField('Date', order='descending')]
    )
    .transform_filter('datum.rank == 1')
    .mark_point(filled=True)
    .encode(
        size=alt.condition(
            alt.datum.Region == "World",
            alt.value(100),
            alt.value(60)
        )))

    # Labels layer for the last point
    labels_bold = (
    base
    .transform_window(
        rank='rank()', sort=[alt.SortField('Date', order='descending')]
    )
    .transform_filter('datum.rank == 1')
    .transform_filter(alt.datum.Region == "World")
    .mark_text(align="left", dx=5, fontWeight="bold", clip=False)
    .encode(
        text="Region:N"
    ))

    # Combine all the layers into a single chart
    chart = (
        (lines + last_points + labels)
        .properties(
            width=750,
            height=400,
            title="China Solar Exports - Monthly Capacity by Region"
        )
        .configure_axis(
            grid=False
        )
    )

    return chart


plot_regional_exports_by_month()

In [None]:
def plot_regional_exports_with_world():
    """
    Creates a line chart of regional exports and a specially-styled
    line for the 'World' total, with points and labels at the end.
    This version includes the side legend.
    """
    
    regions_to_remove = ["EU", "G20", "G7", "OECD", "ASEAN"]
    
    # --- FIX 1: Add "World" to the domain and range ---
    domain_list = ["World", "Africa", "Asia", "Europe", "Latin America and Caribbean", "Middle East", "North America", "Oceania"]
    
    # Added "#000000" (black) for "World" and fixed typo '##5F3C4F'
    color_range = ["#C3272B", "#D0957E","#F7D881","#006796","#5F3C4F","#ECB16B","#6ABA92","#8A7F8D"]

    # 1. Call your function
    df_regions = china_solar_exports_monthly_by_region("regions").filter(
        pl.col("Region").is_in(regions_to_remove).not_()
    )

    # 2. Load the world data
    world_df = china_solar_exports_monthly_by_region("world")

    # 3. Combine them into a single DataFrame
    combined_df = pl.concat([df_regions, world_df])

    # --- FIX 2: Remove 'legend=None' ---
    color_encoding = alt.Color(
            "Region:N",
            title="Region",
            # legend=None,  <-- This line was removed to show the legend
            scale=alt.Scale(domain=domain_list, range=color_range)
        )

    # Create the base chart
    base = alt.Chart(combined_df).encode(
        x=alt.X(
            "Date:T",
            title="Year",
            axis=alt.Axis(format="%Y", tickCount="year")
        ),
        y=alt.Y("Total Capacity (MW):Q", title="Total Capacity (MW)"),
        color=color_encoding
    )

    # --- MODIFIED Line chart ---
    lines = base.mark_line().encode(
        tooltip=["Region", "Date", "Total Capacity (MW):Q"],
        strokeWidth=alt.condition(
            alt.datum.Region == "World",
            alt.value(3.5),
            alt.value(1.5)
        ),
        strokeDash=alt.condition(
            alt.datum.Region == "World",
            alt.value([5, 3]),
            alt.value([1, 0])
        )
    )

    # --- MODIFIED Points ---
    # --- Points (use the working window logic) ---
    last_points = (
        base
        .transform_window(
            rank='rank()', sort=[alt.SortField('Date', order='descending')]
        )
        .transform_filter('datum.rank == 1')
        .mark_point(filled=True)
        .encode(
            size=alt.condition(
                alt.datum.Region == "World",
                alt.value(100),
                alt.value(60)
            )
        )
    )

    # --- Labels for regions except World ---
    labels_normal = (
        alt.Chart(combined_df)
        .transform_window(
            rank='rank()', sort=[alt.SortField('Date', order='descending')]
        )
        .transform_filter('datum.rank == 1')
        .transform_filter(alt.datum.Region != "World")
        .mark_text(align="left", dx=5, fontWeight="normal")
        .encode(
            x="Date:T",
            y="Total Capacity (MW):Q",
            text="Region:N",
            color=alt.value("black")  # break color inheritance → fixes legend issue
        )
    )

    # --- Label for World ---
    labels_bold = (
        alt.Chart(combined_df)
        .transform_window(
            rank='rank()', sort=[alt.SortField('Date', order='descending')]
        )
        .transform_filter('datum.rank == 1')
        .transform_filter(alt.datum.Region == "World")
        .mark_text(align="left", dx=5, fontWeight="bold")
        .encode(
            x="Date:T",
            y="Total Capacity (MW):Q",
            text="Region:N",
            color=alt.value("black")
        )
    )

    # --- Combine all layers ---
    styled_chart = (
        # Add both label layers
        (lines + last_points + labels_normal + labels_bold)
        .properties(
            width=750,
            height=400,
            title="China Solar Exports - Monthly Capacity by Region (with World Total)"
        )
        .configure_axis(
            grid=False
        )
    )

    return styled_chart


plot_regional_exports_with_world()

In [None]:
def plot_regional_exports_by_technology():
    """
    Creates a grouped bar chart showing the total export capacity for each
    solar technology, faceted by region.

    Args:
        domain_list (list, optional): The list of technology names for color mapping.
        color_range (list, optional): A list of hex color codes to map to the domain_list.
    """
    
    # Load data
    df_regions_details = china_solar_exports_monthly("regions")
    domain_list=['Panels', 'Cells', 'Wafers']
    color_range=['#FFCF63', '#A52A2A', '#DECBC6']

    # Create the Grouped Bar Chart
    grouped_bar_chart = (
        alt.Chart(df_regions_details)
        .mark_bar()
        .encode(
            # Group the technologies on x-axis
            x=alt.X("Commodity type:N", title=None, axis=None),
            
            # y-axis is the total sum for that technology
            y=alt.Y(
                "sum(Total Capacity (MW)):Q",
                title="Total Capacity (MW)"
            ),
            
            # Color the bars by the technology type with a custom scale
            color=alt.Color(
                "Commodity type:N",
                title="Technology",
                scale=alt.Scale(domain=domain_list, range=color_range)
            ),
            
            # Column for each region to group the bars
            column=alt.Column(
                "Region:N",
                title="Region",
                header=alt.Header(titleOrient="bottom", labelOrient="bottom", titlePadding=5),
            ),
            
            # tooltip
            tooltip=[
                "Region:N",
                "Commodity type:N",
                alt.Tooltip(
                    "sum(Total Capacity (MW)):Q",
                    title="Total Capacity (MW)",
                    format=","
                ),
            ],
        )
        .properties(
            title="Total Solar Exports by Technology and Region",
            width=alt.Step(25),  # Sets the width of each *individual bar*
            height=300
        )
        .configure_facet(
            spacing=15  # Adds a little space between each region's group
        )
        .configure_view(
            stroke=None  # Removes outer border
        )
    )

    return grouped_bar_chart


plot_regional_exports_by_technology()

In [None]:
def plot_regional_composition_facet():
    """
    Creates a faceted 100% stacked bar chart showing the composition
    of solar exports for each region.
    """

    domain_list=['Panels', 'Cells', 'Wafers']
    color_range=['#FFCF63', '#A52A2A', '#DECBC6']

    # Load data
    df_regions_details = china_solar_exports_monthly("regions")

    # Create the faceted 100% stacked bar chart
    wrapped_normalized_bar_chart = (
        alt.Chart(df_regions_details)
        .mark_bar()
        .encode(
            # Y-axis: Sum of capacity, normalized to 100%
            y=alt.Y(
                "sum(Total Capacity (MW)):Q",
                title="Share of Exports",
                stack="normalize",  # 100% stack
                axis=alt.Axis(format="%") # Format axis as percentage
            ),
            
            # Color
            color=alt.Color(
                "Commodity type:N",
                title="Technology",
                scale=alt.Scale(domain=domain_list, range=color_range)
            ),

            # Tooltip
            tooltip=[
                "Region:N",
                "Commodity type:N",
                alt.Tooltip(
                    "sum(Total Capacity (MW)):Q",
                    title="Total Capacity (MW)",
                    format=","
                )
            ],

            # Facet
            facet=alt.Facet(
                "Region:N",
                columns=4  
            )
        )
        .properties(
            title="Composition of China's Solar Exports by Region (All Time)",
            # width/height for each individual chart in the grid
            width=150,
            height=300
        )
    )

    return wrapped_normalized_bar_chart


plot_regional_composition_facet()