# Data Compression of Raw Data

In [12]:
import numpy as np
from sklearn.neural_network import MLPClassifier
import random
import torch
from scipy.special import erf
import matplotlib.pyplot as plt
from math import*
from scipy.integrate import quad as itg
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from src.config import RESULT_DIR, FIGURE_DIR
from utils.experiments import dump_pickle, load_pickle, concat_dataframes
from src.config import RESULT_DIR as empirics_dir
from pathlib import Path

In [6]:
def group(df):
    return df.groupby(['d','L','r','delta','sigma','omega','attention_lmbda','linear_lmbda','alpha','informed','optim'])


In [15]:
df = concat_dataframes([load_pickle(f) for f in (empirics_dir / '01_paper_toy_example_phase_diagram_pos_init').glob('*.pkl')]) # replace with 01_paper_toy_example_phase_diagram
df['linear_gen_error_mean'] = df.linear_gen_error.apply(np.mean)
df['attention_gen_error_mean'] = df.attention_gen_error.apply(np.mean)
df['attention_train_error_mean'] = df.attention_train_error.apply(np.mean)
df['attention_mag_mean'] = df.attention_magnetization.apply(np.mean)
df['attention_theta_mean'] = df.attention_theta.apply(np.mean)
df_pos_init = df
df_pos_init.informed = False

df = concat_dataframes([load_pickle(f) for f in (empirics_dir / '01_paper_toy_example_phase_diagram').glob('*.pkl')]) # replace with 01_paper_toy_example_phase_diagram
df['linear_gen_error_mean'] = df.linear_gen_error.apply(np.mean)
df['attention_gen_error_mean'] = df.attention_gen_error.apply(np.mean)
df['attention_train_error_mean'] = df.attention_train_error.apply(np.mean)
df['attention_mag_mean'] = df.attention_magnetization.apply(np.mean)
df['attention_theta_mean'] = df.attention_theta.apply(np.mean)
df = df[df.informed]


df = pd.concat([df, df_pos_init])
df = group(df).head(24) # retain the first 24 rows for each group

# assert that every group actually has 24 rows
print(group(df).size())
assert all(group(df).size() == 24)

df_orig = df.copy()
df = df_orig.groupby(['d','L','r','delta','sigma','omega','attention_lmbda','linear_lmbda','alpha','informed','optim']).mean(numeric_only=True).reset_index()
df_std = df_orig.groupby(['d','L','r','delta','sigma','omega','attention_lmbda','linear_lmbda','alpha','informed','optim']).std(numeric_only=True).reset_index()



# save both df and df_std
df.to_csv('empirics/results_mean_standardA.csv', index=False)
df_std.to_csv('empirics/results_std_standardA.csv', index=False)


d     L  r  delta  sigma  omega  attention_lmbda  linear_lmbda  alpha     informed  optim
1000  2  1  0.4    0.3    0.3    0.01             0.0001        0.010000  False     GD       24
                                                                          True      GD       24
                                                                0.092917  False     GD       24
                                                                          True      GD       24
                                                                0.175833  False     GD       24
                                                                                             ..
                   1.0    0.3    0.01             0.0001        1.834167  True      GD       24
                                                                1.917083  False     GD       24
                                                                          True      GD       24
                                              

In [19]:
df_samples = df_orig.groupby(['d','L','r','delta','sigma','omega','attention_lmbda','linear_lmbda','alpha']).get_group((1000, 2, 1, 0.4, 0.5, 0.3, 0.01, 0.0001, 2.0))

df_samples.to_csv('empirics/results_samples_standardA.csv', index=False)

In [9]:
df = concat_dataframes([load_pickle(f) for f in (empirics_dir / '02_paper_toy_example_phase_diagram_otherA').glob('*.pkl')]) # replace with 01_paper_toy_example_phase_diagram
df['linear_gen_error_mean'] = df.linear_gen_error.apply(np.mean)
df['attention_gen_error_mean'] = df.attention_gen_error.apply(np.mean)
df['attention_train_error_mean'] = df.attention_train_error.apply(np.mean)
df['attention_mag_mean'] = df.attention_magnetization.apply(np.mean)
df['attention_theta_mean'] = df.attention_theta.apply(np.mean)
df_pos_init = df

df = group(df).head(24) # retain the first 24 rows for each group

# assert that every group actually has 24 rows
print(group(df).size())
assert all(group(df).size() == 24)

df_orig = df.copy()
df = df_orig.groupby(['d','L','r','delta','sigma','omega','attention_lmbda','linear_lmbda','alpha','informed','optim']).mean(numeric_only=True).reset_index()
df_std = df_orig.groupby(['d','L','r','delta','sigma','omega','attention_lmbda','linear_lmbda','alpha','informed','optim']).std(numeric_only=True).reset_index()

# save both df and df_std
df.to_csv('empirics/results_mean_otherA.csv', index=False)
df_std.to_csv('empirics/results_std_otherA.csv', index=False)


d     L  r  delta  sigma  omega  attention_lmbda  linear_lmbda  alpha     informed  optim
1000  2  1  0.4    0.5    0.02   0.01             0.0001        0.010000  False     GD       24
                                                                          True      GD       24
                                                                0.092917  False     GD       24
                                                                          True      GD       24
                                                                0.175833  False     GD       24
                                                                                             ..
                          0.70   0.01             0.0001        1.834167  True      GD       24
                                                                1.917083  False     GD       24
                                                                          True      GD       24
                                              

In [10]:
df = concat_dataframes([load_pickle(f) for f in (empirics_dir / '05_paper_toy_example_r=2_large_alpha').glob('*.pkl')]+[load_pickle(f) for f in (empirics_dir / '03_paper_toy_example_r=2').glob('*.pkl')]) # replace with 01_paper_toy_example_phase_diagram
df['attention_mag_max'] = df.magli.apply(lambda x: x[-1][0].max())
df['attention_thet_max'] = df.thetali.apply(lambda x: x[-1][0].max())
df['attention_mag_min'] = df.magli.apply(lambda x: x[-1][0].min())
df['attention_thet_min'] = df.thetali.apply(lambda x: x[-1][0].min())


df['linear_gen_error_mean'] = df.linear_gen_error.apply(np.mean)
df['attention_gen_error_mean'] = df.attention_gen_error.apply(np.mean)
df['attention_train_error_mean'] = df.attention_train_error.apply(np.mean)
df_orig = df.copy()


df = group(df).head(10) # retain the first 10 rows for each group

df_orig = df.copy()
df = df_orig.groupby(['d','L','r','delta','sigma','omega','attention_lmbda','linear_lmbda','alpha','informed','optim']).mean(numeric_only=True).reset_index()
df_std = df_orig.groupby(['d','L','r','delta','sigma','omega','attention_lmbda','linear_lmbda','alpha','informed','optim']).std(numeric_only=True).reset_index()


# save both df and df_std
df.to_csv('empirics/results_mean_r=2.csv', index=False)
df_std.to_csv('empirics/results_std_r=2.csv', index=False)

In [14]:
df = concat_dataframes([load_pickle(f) for f in ( Path('raw/mixed_teacher_softmax_adam_d=100')).glob('*.pkl')]) # replace with 01_paper_toy_example_phase_diagram
df['linear_gen_error_mean'] = df.linear_gen_error.apply(np.mean)
df['attention_gen_error_mean'] = df.attention_gen_error.apply(np.mean)
df['attention_train_error_mean'] = df.attention_train_error.apply(np.mean)
df['attention_mag_mean'] = df.attention_magnetization.apply(np.mean)
df['attention_theta_mean'] = df.attention_theta.apply(np.mean)
df = df[~df.informed]

for t, g in group(df):
    print(t)
    
df.to_csv('empirics/results_adam.csv', index=False)

(100, 2, 1, 0.4, 0.5, 0.3, 0.01, 0.0001, 2.0, False, 'adam')


In [72]:
df = concat_dataframes([load_pickle(f) for f in ( Path('raw/empirics/06_paper_toy_example_scaling')).glob('*.pkl')]) # replace with 01_paper_toy_example_phase_diagram
df['linear_gen_error_mean'] = df.linear_gen_error.apply(np.mean)
df['attention_gen_error_mean'] = df.attention_gen_error.apply(np.mean)
df['attention_train_error_mean'] = df.attention_train_error.apply(np.mean)
df['attention_mag_mean'] = df.attention_magnetization.apply(np.mean)
df['attention_theta_mean'] = df.attention_theta.apply(np.mean)
print(group(df).count())

max_samples = 70
df = group(df).head(max_samples)

df_orig = df.copy()
df = df_orig.groupby(['d','L','r','delta','sigma','omega','attention_lmbda','linear_lmbda','alpha','informed','optim']).mean(numeric_only=True).reset_index()
df_std = df_orig.groupby(['d','L','r','delta','sigma','omega','attention_lmbda','linear_lmbda','alpha','informed','optim']).std(numeric_only=True).reset_index() 

df_orig.to_csv('empirics/results_scaling_orig.csv', index=False)
df.to_csv('empirics/results_scaling.csv', index=False)
df_std.to_csv('empirics/results_std_scaling.csv', index=False)

                                                                             N_iter  \
d   L r delta sigma omega attention_lmbda linear_lmbda alpha informed optim           
10  2 1 0.4   0.5   0.3   0.01            0.0001       1.5   False    GD         80   
                                                             True     GD         80   
15  2 1 0.4   0.5   0.3   0.01            0.0001       1.5   False    GD         80   
                                                             True     GD         80   
23  2 1 0.4   0.5   0.3   0.01            0.0001       1.5   False    GD         80   
                                                             True     GD         80   
36  2 1 0.4   0.5   0.3   0.01            0.0001       1.5   False    GD         80   
                                                             True     GD         80   
56  2 1 0.4   0.5   0.3   0.01            0.0001       1.5   False    GD         80   
                                           