# Imports & Loads

In [22]:
import pandas as pd
import numpy as np
import joblib
import json
from zipfile import ZipFile

### Some required info from the feature extraction process

In [23]:
with open('feature_extraction_info.json', 'r') as f:
    feature_extraction_info = json.load(f)
    
n_valid_subjects = feature_extraction_info['n_subjects_valid']
n_test_subjects = feature_extraction_info['n_subjects_test']

n_windows_ec = feature_extraction_info['n_windows_ec']
n_windows_eo = feature_extraction_info['n_windows_eo']

test_subjects = feature_extraction_info['test_dataset_subjects']

### Eyes closed condition

In [24]:
test_features_ec_path = "Data/test_ec.npy"

test_features_ec = np.load(test_features_ec_path, allow_pickle=True)

In [25]:
model_ec_path = "autoML_model_ec6.pkl"

model_ec = joblib.load(model_ec_path)

In [26]:
df_valid_ec_path = "Data/df_valid_ec.csv"

df_valid_ec = pd.read_csv(df_valid_ec_path)

### Eyes open condition

In [27]:
test_features_eo_path = "Data/test_eo.npy"

test_features_eo = np.load(test_features_eo_path, allow_pickle=True)

In [28]:
model_eo_path = "autoML_model_eo6.pkl"

model_eo = joblib.load(model_eo_path)

In [29]:
df_valid_eo_path = "Data/df_valid_eo.csv"

df_valid_eo = pd.read_csv(df_valid_eo_path)

# Defining EO-EC models ensemble weights

In [46]:
ec_model_weight = 0.635

eo_model_weight = 0.365

assert (ec_model_weight + eo_model_weight == 1), "Sum of weights should be equal to 1"

# Metric function

In [47]:
def calc_MAE(signal1, signal2):
    
    """
    Returns the Mean Absoluete Error between signal1 and signal2 (Both should be 1d arrays)
    """
    
    return np.mean(np.abs(signal1 - signal2))

# Verifying validation scores 

### EC Condition Validation set

In [48]:
valid_ec_features = df_valid_ec.iloc[:, :-1].to_numpy()
valid_ec_Y = df_valid_ec.iloc[:, -1].to_numpy()

valid_ec_pred = model_ec.predict(valid_ec_features)

model_ec_valid_score = calc_MAE(valid_ec_pred, valid_ec_Y)

print("autoML_model_ec validation set score: ", model_ec_valid_score)

autoML_model_ec validation set score:  1.7019202191350455


### EO Condition validation 

In [49]:
valid_eo_features = df_valid_eo.iloc[:, :-1].to_numpy()
valid_eo_Y = df_valid_eo.iloc[:, -1].to_numpy()

valid_eo_pred = model_eo.predict(valid_eo_features)

model_eo_valid_score = calc_MAE(valid_eo_pred, valid_eo_Y)
print("autoML_model_eo validation set score: ", model_eo_valid_score)

autoML_model_eo validation set score:  1.8187050114823526


### Ensemble score over Validation set

In [50]:
#Actual age values are same for both 'eo' and 'ec' validation sets

valid_Y_actual = valid_ec_Y[::n_windows_ec]

#valid_Y_actual = validY_eo[::n_windows_eo] 

In [51]:
valid_ec_Y_pred = valid_ec_pred.reshape(n_valid_subjects, -1)
valid_ec_Y_pred = np.mean(valid_ec_Y_pred, axis=1)

In [52]:
valid_eo_Y_pred = valid_eo_pred.reshape(n_valid_subjects, -1)
valid_eo_Y_pred = np.mean(valid_eo_Y_pred, axis=1)

In [53]:
valid_Y_pred = (ec_model_weight*valid_ec_Y_pred) + (eo_model_weight*valid_eo_Y_pred)

final_validation_score = calc_MAE(valid_Y_pred, valid_Y_actual)
print("Validation set final score: ", final_validation_score)

Validation set final score:  1.5787076303198


# Test Predictions

### model_ec prediction

In [17]:
test_ec_pred = model_ec.predict(test_features_ec)

test_ec_pred = test_ec_pred.reshape(n_test_subjects, -1)

test_ec_pred = np.mean(test_ec_pred, axis=1)

### model_eo prediction

In [18]:
test_eo_pred = model_eo.predict(test_features_eo)

test_eo_pred = test_eo_pred.reshape(n_test_subjects, -1)

test_eo_pred = np.mean(test_eo_pred, axis=1)

### Ensemble of 'eo' and 'ec' predictions

In [19]:
test_preds_final = (ec_model_weight*test_ec_pred) + (eo_model_weight*test_eo_pred)

### Saving the predictions in a dataframe

In [20]:
df_final = pd.DataFrame({"id":test_subjects, "age":test_preds_final})

df_final.to_csv("df_submission.csv", index=False)

print(df_final)

       id   age
0    1601  9.20
1    1602  9.67
2    1603 10.31
3    1604 10.51
4    1605  8.57
..    ...   ...
395  1996  7.40
396  1997 12.69
397  1998  7.20
398  1999  9.60
399  2000 10.99

[400 rows x 2 columns]


### Zipping the submission dataframe

In [21]:
with ZipFile("df_submission.zip", 'w') as zipf:
    zipf.write("df_submission.csv", arcname="df_submission.csv")