# Ablation Analysis for the Conv2d Module
## Imports & Definitions

In [None]:
import itertools
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from utils.data_utils import preprocess_and_normalize_energy_data
from sklearn.linear_model import LinearRegression
from experiments_utils import split_data_set, fit_model, compute_log_transformed_features, apply_data_transforms, test_model
%load_ext autoreload
%autoreload 2
SEED = 1223
param_cols = ['batch_size','image_size','kernel_size','in_channels','out_channels','stride','padding']

## Load Data

In [None]:
data_unnormalized = pd.read_csv('../data/conv2d-energies-parsed.csv')
data = preprocess_and_normalize_energy_data(data_unnormalized, param_cols, aggregate=True)

## Ablation Analysis
### Compute all possible feature-sets

In [None]:
data_with_log, param_cols_with_log = compute_log_transformed_features(data, param_cols)
feature_names = param_cols_with_log + ['macs']
all_feature_comb_all_sizes = []
for i in range(1, len(feature_names) + 1):
    all_feature_comb_all_sizes.extend(itertools.combinations(feature_names, i))

### Fit the model to all feature-sets

In [None]:
transformers_dict = {
    "x_preprocessors": [StandardScaler()],
    "y_preprocessor": MinMaxScaler()
}
scores = pd.DataFrame(columns=['feature_set','val_score','test_score'])
for f_set in tqdm(all_feature_comb_all_sizes):
    dfs = split_data_set(data_with_log.copy(), list(f_set), SEED)
    dfs, _ = apply_data_transforms(dfs, transformers_dict)
    model, val_score, val_mse = fit_model(LinearRegression(), dfs["x_train"], dfs["y_train"], dfs["x_val"], dfs["y_val"], plot_results=False, verbose=False)
    y_hat, test_score, test_mse = test_model(model,dfs["x_test"],dfs["y_test"], plot_results=False, verbose=False)
    new_row = {
        'feature_set': str(f_set),
        'val_score':val_score,
        'test_score':test_score
    }
    scores = pd.concat([scores, pd.DataFrame(new_row, index=[0])], ignore_index = True, axis = 0)

## Evaluate Scores

In [None]:
custom_label_macs = []
for idx, row in scores.iterrows():
    if 'macs' in row.feature_set:
        custom_label_macs.append(True)
    else:
        custom_label_macs.append(False)
scores['with_MACs'] = custom_label_macs
scores.head()

In [None]:
plt.figure(figsize=(10,8))
g = sns.lineplot(y=scores.test_score, x=scores.index, hue=scores.with_MACs)
# g.legend(['with MACs', 'without MACs'], title="feature-set")
g.set_xlabel('Feature-set Index')
g.set_ylabel('Test R²-Score')
scores['test_score'] = pd.to_numeric(scores['test_score'])
print("max score with MACs: ", max(scores.loc[scores.with_MACs == True].test_score), scores.iloc[scores.loc[scores.with_MACs == True].test_score.idxmax()].feature_set)
print("min score with MACs: ", min(scores.loc[scores.with_MACs == True].test_score), scores.iloc[scores.loc[scores.with_MACs == True].test_score.idxmin()].feature_set)
print("max score without MACs: ", max(scores.loc[scores.with_MACs == False].test_score), scores.iloc[scores.loc[scores.with_MACs == False].test_score.idxmax()].feature_set)
print("min score without MACs: ", min(scores.loc[scores.with_MACs == False].test_score), scores.iloc[scores.loc[scores.with_MACs == False].test_score.idxmin()].feature_set)