In [None]:
import pandas as pd
import plotly.graph_objects as go

In [None]:
train_df = pd.read_csv("../data/C1/train.csv")
train_df

In [None]:
test_df = pd.read_csv("../data/C1/test.csv")
test_df

In [None]:
train_df.info()

In [None]:
test_df.info()

In [None]:
train_df["Personality"].value_counts()

In [None]:
def create_visualization(df:pd.DataFrame):
    df = df.drop("id", axis=1)
    value_counts_dict = {col: df[col].astype(str).value_counts().reset_index() for col in df.columns}

    # Initialize figure
    fig = go.Figure()

    # Add a bar trace for each column (initially only show the first one)
    for i, (col, counts_df) in enumerate(value_counts_dict.items()):
        visible = True if i == 0 else False
        fig.add_trace(go.Bar(
            x=counts_df[col],
            y=counts_df["count"],
            name=col,
            visible=visible
        ))

    # Create dropdown buttons
    dropdown_buttons = []
    for i, col in enumerate(df.columns):
        visibility = [False] * len(df.columns)
        visibility[i] = True  # Show only the current column's trace
        button = dict(
            label=col,
            method="update",
            args=[{"visible": visibility},
                {"title": f"Distribution of {col}"}]
        )
        dropdown_buttons.append(button)

    # Update layout with dropdown menu
    fig.update_layout(
        updatemenus=[dict(
            active=0,
            buttons=dropdown_buttons,
            x=0.5,
            xanchor="center",
            y=1.2,
            yanchor="top"
        )],
        title=f"Distribution of {df.columns[0]}",
        xaxis_title="Value",
        yaxis_title="Count"
    )

    fig.show()

In [None]:
create_visualization(train_df)

In [None]:
create_visualization(test_df)