In [1]:
import pandas as pd
import plotly.express as px
import argparse
from pathlib import Path


def analyze_assignee(dataset_path: Path) -> None:

    output_dir = Path("output")
    output_dir.mkdir(exist_ok=True)

    try:
        df = pd.read_csv(dataset_path)

        if "Assignee" not in df.columns:
            print(f"Error: 'Assignee' column not found in {dataset_path}")
            return

        df_cleaned = df.dropna(subset=['Assignee'])

        assignee_counts = df_cleaned['Assignee'].value_counts().reset_index()
        assignee_counts.columns = ['Assignee', 'Count']

        print("--- Top 10 Assignees ---")
        print(assignee_counts.head(10))
        print("\n")

        # 1. Bar Chart: For frequency distribution
        fig_bar = px.bar(
            assignee_counts,
            x='Assignee',
            y='Count',
            title='Assignee Distribution 📊',
            labels={'Count': 'Number of Assignments', 'Assignee': 'Assignee Name'},
            color='Assignee'
        )
        fig_bar.show()
        bar_chart_path = output_dir / "assignee_bar_chart.png"
        fig_bar.write_image(bar_chart_path, width=1200, height=600)


        # 3. Treemap: For hierarchical view of the distribution
        fig_treemap = px.treemap(
            assignee_counts,
            path=[px.Constant("All Assignees"), 'Assignee'],
            values='Count',
            title='Hierarchical View of Assignees 🌳',
            color='Count',
            color_continuous_scale='viridis'
        )
        fig_treemap.update_layout(margin = dict(t=50, l=25, r=25, b=25))
        fig_treemap.show()
        treemap_path = output_dir / "assignee_treemap.png"
        fig_treemap.write_image(treemap_path, width=1200, height=800)
        
    except FileNotFoundError:
        print(f"Error: The file at {dataset_path} was not found.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")


In [None]:
analyze_assignee("../datasets/gcc_data.csv")

--- Top 10 Assignees ---
                 Assignee  Count
0           Tobias Burnus    264
1         Benjamin Kosnik    257
2  Alexandre Petit-Bianco    159
3           Paolo Bonzini    110
4          David Edelsohn     99
5         Alexandre Oliva     95
6              Alan Modra     89
7           David Malcolm     70
8            Andrew Haley     67
9          Bryce McKinlay     57


