# Parse Output

This Jupyter Notebook is for parsing output from baseline method experiments

In [10]:
import os
import pandas as pd
import numpy as np

In [3]:
dfs = [(csv, pd.read_csv(f"../../output/{csv}", index_col=0)) for csv in os.listdir("../../output")]

In [4]:
all_data = pd.concat([df[1] for df in dfs]).reset_index(drop=True)
all_data.head()

Unnamed: 0,method,seed,target_domain,accuracy,precision,recall,mse
0,svm,3,svhn,0.321667,0.345866,0.321667,12.669167
1,random_forest,3,svhn,0.327,0.365225,0.327,11.958167
2,adaboost,3,svhn,0.2295,0.239056,0.2295,14.443333
3,svm,1,mnist_m,0.299,0.314186,0.299,12.484833
4,random_forest,1,mnist_m,0.254667,0.254395,0.254667,13.4545


In [5]:
acc_data = all_data[['method', 'seed', 'target_domain', 'accuracy']]
target_domains = acc_data['target_domain'].unique()
methods = acc_data['method'].unique()


In [6]:
grouped_by_df = acc_data.groupby(by=['method', 'target_domain'])

# grouped_by_df.mean().drop(columns=['seed'])
# grouped_by_df.std().drop(columns=['seed'])
agg_data = grouped_by_df.agg({
    'accuracy': ['std', 'mean']
})['accuracy']
agg_data

Unnamed: 0_level_0,Unnamed: 1_level_0,std,mean
method,target_domain,Unnamed: 2_level_1,Unnamed: 3_level_1
adaboost,mnist,0.012166,0.671
adaboost,mnist_m,0.003441,0.250889
adaboost,svhn,0.010983,0.228111
adaboost,syn,0.019529,0.341722
random_forest,mnist,0.007017,0.790611
random_forest,mnist_m,0.003881,0.257389
random_forest,svhn,0.00543,0.320778
random_forest,syn,0.00271,0.420889
svm,mnist,0.0,0.739167
svm,mnist_m,0.0,0.299


In [7]:
agg_data['value'] = agg_data['mean'].apply(lambda x: f"{x*100:.2f}") + "±" + agg_data['std'].apply(lambda x: f"{x*100:.2f}")
agg_data

Unnamed: 0_level_0,Unnamed: 1_level_0,std,mean,value
method,target_domain,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
adaboost,mnist,0.012166,0.671,67.10±1.22
adaboost,mnist_m,0.003441,0.250889,25.09±0.34
adaboost,svhn,0.010983,0.228111,22.81±1.10
adaboost,syn,0.019529,0.341722,34.17±1.95
random_forest,mnist,0.007017,0.790611,79.06±0.70
random_forest,mnist_m,0.003881,0.257389,25.74±0.39
random_forest,svhn,0.00543,0.320778,32.08±0.54
random_forest,syn,0.00271,0.420889,42.09±0.27
svm,mnist,0.0,0.739167,73.92±0.00
svm,mnist_m,0.0,0.299,29.90±0.00


In [8]:
summary_df = pd.pivot_table(agg_data, values='value', index=['method'], columns=['target_domain'], aggfunc=lambda x: x)
summary_df['Avg.'] = (agg_data[['mean']].groupby('method').mean()['mean']*100).round(2)
summary_df

target_domain,mnist,mnist_m,svhn,syn,Avg.
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
adaboost,67.10±1.22,25.09±0.34,22.81±1.10,34.17±1.95,37.29
random_forest,79.06±0.70,25.74±0.39,32.08±0.54,42.09±0.27,44.74
svm,73.92±0.00,29.90±0.00,32.17±0.00,55.37±0.00,47.84


In [124]:
print(summary_df.style.to_latex())

\begin{tabular}{lllllr}
target_domain & mnist & mnist_m & svhn & syn & Avg. \\
method &  &  &  &  &  \\
adaboost & 67.10±1.22 & 25.09±0.34 & 22.81±1.10 & 34.17±1.95 & 0.372931 \\
random_forest & 79.06±0.70 & 25.74±0.39 & 32.08±0.54 & 42.09±0.27 & 0.447417 \\
svm & 73.92±0.00 & 29.90±0.00 & 32.17±0.00 & 55.37±0.00 & 0.478375 \\
\end{tabular}



# General Commands

In [3]:
datasets = ['mnist', 'mnist_m', 'svhn', 'syn']
seeds = [1, 2, 3]
for dst in datasets:
    for seed in seeds:
        print(f"python src/baseline/baseline.py --target_domain {dst} -s {seed} -o output/target-{dst}-seed-{seed}.csv")
# python baseline.py --target_domain syn -o output/baseline-target-syn.csv

python src/baseline/baseline.py --target_domain mnist -s 1 -o output/target-mnist-seed-1.csv
python src/baseline/baseline.py --target_domain mnist -s 2 -o output/target-mnist-seed-2.csv
python src/baseline/baseline.py --target_domain mnist -s 3 -o output/target-mnist-seed-3.csv
python src/baseline/baseline.py --target_domain mnist_m -s 1 -o output/target-mnist_m-seed-1.csv
python src/baseline/baseline.py --target_domain mnist_m -s 2 -o output/target-mnist_m-seed-2.csv
python src/baseline/baseline.py --target_domain mnist_m -s 3 -o output/target-mnist_m-seed-3.csv
python src/baseline/baseline.py --target_domain svhn -s 1 -o output/target-svhn-seed-1.csv
python src/baseline/baseline.py --target_domain svhn -s 2 -o output/target-svhn-seed-2.csv
python src/baseline/baseline.py --target_domain svhn -s 3 -o output/target-svhn-seed-3.csv
python src/baseline/baseline.py --target_domain syn -s 1 -o output/target-syn-seed-1.csv
python src/baseline/baseline.py --target_domain syn -s 2 -o output/t

In [23]:
datasets = [f'Dst{i}' for i in range(1, 5)]
models = [f'Model{i}' for i in range(1, 5)]
df = pd.DataFrame([(model, dst, np.random.rand()) for model in models for dst in datasets], columns=['Model', 'Dataset', 'Accuracy'])
df2 = df.set_index(['Model', 'Dataset'])
df2
# data = np.random.randn(4, 4)
# df = pd.DataFrame(data=data, columns=models, index=datasets)
# df

Unnamed: 0_level_0,Unnamed: 1_level_0,Accuracy
Model,Dataset,Unnamed: 2_level_1
Model1,Dst1,0.340028
Model1,Dst2,0.778824
Model1,Dst3,0.069166
Model1,Dst4,0.14099
Model2,Dst1,0.42518
Model2,Dst2,0.412939
Model2,Dst3,0.44616
Model2,Dst4,0.157372
Model3,Dst1,0.64035
Model3,Dst2,0.925605


In [27]:
pivot_table = pd.pivot_table(df2, values=['Accuracy'], columns=['Dataset'], index='Model')
pivot_table

Unnamed: 0_level_0,Accuracy,Accuracy,Accuracy,Accuracy
Dataset,Dst1,Dst2,Dst3,Dst4
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Model1,0.340028,0.778824,0.069166,0.14099
Model2,0.42518,0.412939,0.44616,0.157372
Model3,0.64035,0.925605,0.923257,0.012333
Model4,0.455119,0.651423,0.567043,0.02386


In [38]:
pivot_table['Accuracy']

Dataset,Dst1,Dst2,Dst3,Dst4
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Model1,0.340028,0.778824,0.069166,0.14099
Model2,0.42518,0.412939,0.44616,0.157372
Model3,0.64035,0.925605,0.923257,0.012333
Model4,0.455119,0.651423,0.567043,0.02386


In [40]:
pivot_table['Accuracy'].reset_index().drop(columns=['Model'])

Dataset,Dst1,Dst2,Dst3,Dst4
0,0.340028,0.778824,0.069166,0.14099
1,0.42518,0.412939,0.44616,0.157372
2,0.64035,0.925605,0.923257,0.012333
3,0.455119,0.651423,0.567043,0.02386
