In [1]:
import pandas as pd

supervision = True


# Load the CSV
df = pd.read_csv('../data/results/classification.csv')
df = df[~df.metric.isin(('fpr', 'tpr', 'thresholds'))]
df.score = df.score.astype(float)

# Filter and drop unnecessary columns
df = df[(df.use_declaration_in_train) & (df.metric != 'brier_score')] if supervision else df[(~df.use_declaration_in_train) & (df.metric != 'brier_score')]
df = df.drop(columns=['use_declaration_in_train'])

# Modify the aggregation step to include 'dimension' in the groupby
df_agg = df.groupby(['model', 'dimension', 'metric']).agg(
    avg_score=('score', 'mean'),
    std_score=('score', 'std'),
    num_train_samples=('n samples train', 'first'),
    num_test_samples=('n samples test', 'first')
).reset_index()

# Remove underscores from column names
df_agg.columns = df_agg.columns.str.replace('_', ' ')

# Pivot the data so that each metric becomes a column (separately for avg and std)
df_pivot_avg = df_agg.pivot_table(
    index=['dimension', 'model'],
    columns='metric',
    values='avg score'
).reset_index()

df_pivot_std = df_agg.pivot_table(
    index=['dimension', 'model'],
    columns='metric',
    values='std score'
).reset_index()

# Now merge the avg and std pivot tables and format as "avg \pm std" for LaTeX
df_final = df_pivot_avg.copy()
for col in df_pivot_avg.columns[2:]:  # Skipping the 'model' and 'dimension' columns
    df_final[col] = (
        '$' + df_pivot_avg[col].round(4).astype(str) + ' \\pm ' + df_pivot_std[col].round(4).astype(str) + '$'
    )

# Optionally, you can merge back the train and test sample counts
df_final = pd.merge(
    df_final, 
    df_agg[['dimension', 'model', 'num train samples', 'num test samples']].drop_duplicates(), 
    on=['model', 'dimension']
)

# Append "(mean $\\pm$ std)" to each metric column name
new_columns = []
for col in df_pivot_avg.columns:
    if col not in ['model', 'dimension']:
        new_columns.append(f"{col.replace('_', ' ')} (mean $\\pm$ std)")
    else:
        new_columns.append(col)
df_final.columns = new_columns + list(df_final.columns[-2:])

df_final['model'] = df_final['model'].apply(lambda x: x.replace('_', ' ').replace("model", "").strip())
df_final['dimension'] = df_final['dimension'].apply(lambda x: x.replace('demo_rep', 'partisan'))


# Format 'num train samples' and 'num test samples' with commas for thousands
df_final['num train samples'] = df_final['num train samples'].apply(lambda x: f"${int(x):,}$")
df_final['num test samples'] = df_final['num test samples'].apply(lambda x: f"${int(x):,}$")

# Export the dataframe to a LaTeX file
latex_table = df_final.to_latex(index=False, escape=False, column_format='l' + 'c' * (len(df_final.columns) - 1))

with open(f"../data/tables/classification_{supervision}_supervision.tex", 'w') as file:
    file.write(latex_table)

# Display the final dataframe for review
display(df_final)


Unnamed: 0,dimension,model,accuracy (mean $\pm$ std),f1 (mean $\pm$ std),roc auc (mean $\pm$ std),num train samples,num test samples
0,partisan,majority,$0.5709 \pm 0.0145$,$0.5508 \pm 0.0151$,$0.5521 \pm 0.0134$,"$4,691$","$1,173$"
1,partisan,NB,$0.6602 \pm 0.0132$,$0.6608 \pm 0.0132$,$0.7131 \pm 0.0145$,"$4,691$","$1,173$"
2,partisan,nature,$0.5967 \pm 0.0119$,$0.579 \pm 0.0126$,$0.6728 \pm 0.0131$,"$4,691$","$1,173$"
3,partisan,random forest,$0.6191 \pm 0.0177$,$0.5626 \pm 0.027$,$0.7242 \pm 0.0145$,"$4,691$","$1,173$"
4,gender,majority,$0.5403 \pm 0.0018$,$0.4231 \pm 0.0022$,$0.5322 \pm 0.0007$,"$306,773$","$76,694$"
5,gender,NB,$0.6911 \pm 0.0016$,$0.672 \pm 0.0018$,$0.7956 \pm 0.0014$,"$306,773$","$76,694$"
6,gender,nature,$0.5128 \pm 0.0018$,$0.3752 \pm 0.0022$,$0.6667 \pm 0.0019$,"$306,773$","$76,694$"
7,gender,random forest,$0.6553 \pm 0.003$,$0.6361 \pm 0.0037$,$0.7437 \pm 0.0038$,"$306,773$","$76,694$"
8,year,majority,$0.558 \pm 0.0017$,$0.4033 \pm 0.002$,$0.4967 \pm 0.0002$,"$305,943$","$76,486$"
9,year,NB,$0.6874 \pm 0.0016$,$0.6864 \pm 0.0016$,$0.7368 \pm 0.0017$,"$305,943$","$76,486$"
