In [None]:
import pandas as pd
import plotly.graph_objects as go
import plotly.colors as pc

In [None]:
train_df = pd.read_csv("../data/C1/train.csv")
train_df

In [None]:
test_df = pd.read_csv("../data/C1/test.csv")
test_df

In [None]:
train_df.info()

In [None]:
test_df.info()

In [None]:
def prepare_counts(df_column):
    col_series = df_column.copy()
    
    # Convert NaNs to string "nan" for plotting
    col_series = col_series.astype(object).where(pd.notnull(col_series), "nan").astype(str)
    
    # Count values
    counts = col_series.value_counts().reset_index()
    counts.columns = ['Value', 'Count']
    
    # Convert to sortable format
    def sort_key(val):
        if val == "nan":
            return (2, "")  # NaN Last
        try:
            return (0, float(val))  # Numbers first
        except ValueError:
            return (1, val.lower()) # Strings Second
    
    counts = counts.sort_values(by='Value', key=lambda col: col.map(sort_key)).reset_index(drop=True)
    return counts

def create_visualization(df:pd.DataFrame):
    df = df.drop("id", axis=1)
    value_counts_dict = {col: prepare_counts(df[col]) for col in df.columns}

    fig = go.Figure()
    palette = pc.qualitative.Prism

    # A bar for each column
    for i, (col, counts_df) in enumerate(value_counts_dict.items()):
        num_bars = len(counts_df)
    
        # Repeat or truncate palette to match number of bars
        colors = (palette * ((num_bars // len(palette)) + 1))[:num_bars]

        fig.add_trace(go.Bar(
            x=counts_df['Value'],
            y=counts_df["Count"],
            name=col,
            visible=(i == 0),
            marker_color=colors
        ))

    # Dropdown buttons
    dropdown_buttons = []
    for i, col in enumerate(df.columns):
        visibility = [False] * len(df.columns)
        visibility[i] = True  # Show only the current column
        button = dict(
            label=col,
            method="update",
            args=[{"visible": visibility},
                {"title": f"Distribution of {col}"}]
        )
        dropdown_buttons.append(button)

    # Update layout with dropdown menu
    fig.update_layout(
        updatemenus=[dict(
            active=0,
            buttons=dropdown_buttons,
            x=0.5,
            xanchor="center",
            y=1.2,
            yanchor="top"
        )],
        title=f"Distribution of {df.columns[0]}",
        xaxis_title="Value",
        yaxis_title="Count"
    )

    fig.show()

In [None]:
create_visualization(train_df)

In [None]:
create_visualization(test_df)