In [None]:
"""# Visualization (Piecharts)"""

import pandas as pd
import plotly.express as px

# 1) Read CSV with Pandas
parsed_df = pd.read_csv("output/part-00000-afe825b1-770c-4416-a851-88671f00ecd6-c000.csv")

# 2) Group by theme (Pandas syntax)
theme_groups = (
    parsed_df
    .groupby("theme")["count"]
    .sum()
    .reset_index(name="total_count")
)

# 3) Get list of themes
themes = theme_groups["theme"].tolist()

# 4) Create pie charts for each theme
for theme in themes:
    # Filter rows for this theme, select only language/count columns
    theme_data = parsed_df.loc[parsed_df["theme"] == theme, ["language", "count"]]

    # Sort by count descending
    theme_data = theme_data.sort_values("count", ascending=False)

    # If more than 5 languages, group the rest as 'Others'
    if len(theme_data) > 5:
        top5 = theme_data.head(5)
        others_row = pd.DataFrame({
            'language': ['Others'],
            'count': [theme_data['count'][5:].sum()]
        })
        theme_data = pd.concat([top5, others_row])

    # Create the pie chart with Plotly
    fig = px.pie(
        theme_data,
        values='count',
        names='language',
        title=f'Top Languages for "{theme}" Theme',
        hover_data=['count'],
        hole=0.3
    )

    fig.update_traces(
        textposition='inside',
        textinfo='percent+label',
        insidetextfont=dict(size=12, color='white'),
        hovertemplate="<b>%{label}</b><br>Count: %{value}<br>Percent: %{percent}"
    )

    fig.update_layout(
        uniformtext_minsize=10,
        uniformtext_mode='hide',
        height=600,
        showlegend=False
    )

    fig.show()

# 5) Show overall theme distribution
total_counts = theme_groups.sort_values("total_count", ascending=False)

fig = px.pie(
    total_counts,
    values='total_count',
    names='theme',
    title='Overall Theme Distribution',
    hover_data=['total_count']
)

fig.update_traces(
    textposition='inside',
    textinfo='percent+label',
    insidetextfont=dict(size=12, color='white'),
    hovertemplate="<b>%{label}</b><br>Total Count: %{value}<br>Percent: %{percent}"
)

fig.update_layout(
    height=700,
    showlegend=False
)

fig.show()
