In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from sklearn.cross_decomposition import CCA

# Neural Network Evaluation Data

In [None]:
eval_data1 = pd.read_csv("NN Eval Data/EvalNN_Data1", index_col=[0,1])
eval_data2 = pd.read_csv("NN Eval Data/EvalNN_Data2", index_col=[0,1])
eval_data3 = pd.read_csv("NN Eval Data/EvalNN_Data3", index_col=[0,1])
eval_data4 = pd.read_csv("NN Eval Data/EvalNN_Data4", index_col=[0,1])
eval_data5 = pd.read_csv("NN Eval Data/EvalNN_Data5", index_col=[0,1])

all_eval_data = pd.concat((eval_data1, eval_data2, eval_data3, eval_data4, eval_data5))
all_eval_data = all_eval_data.groupby([all_eval_data.index.get_level_values(0), all_eval_data.index.get_level_values(1)]).mean()

In [None]:
all_eval_data

In [None]:
for column in all_eval_data.columns:
    print(all_eval_data[column].idxmax())

In [None]:
normalized_eval=(all_eval_data-all_eval_data.mean())/all_eval_data.std()
normalized_eval

In [None]:
sns.pairplot(normalized_eval)

In [None]:
normalized_eval.boxplot()

# Similarity Measure Data

In [None]:
sim_data1 = pd.read_csv("Similarity Data/SimilarityData", index_col=[0,1])
sim_data1 = sim_data1.sort_index()
sim_data1.head()

In [None]:
sim_data2 = pd.read_csv("Similarity Data/SimilarityData_AllFeats", index_col=[0,1])
sim_data2 = sim_data2.sort_index()
sim_data2.head()

In [None]:
norm_sim1 = (sim_data1-sim_data1.mean())/sim_data1.std()
norm_sim2 = (sim_data2-sim_data2.mean())/sim_data2.std()
norm_sim1.head()

In [None]:
norm_sim2.head()

In [None]:
norm_sim1.boxplot(grid = False, rot = 60)

In [None]:
norm_sim2.boxplot(grid = False, rot = 60)

In [None]:
sns.pairplot(norm_sim1)

In [None]:
sns.pairplot(norm_sim2)

In [None]:
sim_data1_new = sim_data1.copy()
sim_data2_new = sim_data2.copy()
sim_data1_new.columns = ["Euclidean (1)", "DTW - Between (1)", "DTW - Within (1)", "KLD (1)", "MIS (1)", "EMD (1)", "Granger (1)", "Cosine (1)"]
sim_data2_new.columns = ["Euclidean (2)", "DTW - Between (2)", "DTW - Within (2)", "KLD (2)", "MIS (2)", "EMD (2)", "Cosine (2)"]
corr_coeff = sim_data1_new.join(sim_data2_new, how="outer").corr()
corr_coeff

In [None]:
plt.figure(figsize = (10, 10))
sns.heatmap(corr_coeff.iloc[np.r_[0:6, 7], 8:], cmap='coolwarm', annot=True, linewidths=1, vmin=-1)

# Assess Relationships

In [None]:
all_data1 = sim_data1.join(all_eval_data, how="outer")
all_data2 = sim_data2.join(all_eval_data, how="outer")

In [None]:
print(all_eval_data.shape)
print(sim_data2.shape)
print(all_data2.shape)

In [None]:
all_data1.to_csv("AllData1")
all_data2.to_csv("AllData2")

In [None]:
all_data1.head()

In [None]:
corr_coeff1 = all_data1.corr()
plt.figure(figsize = (10, 10))
sns.heatmap(corr_coeff1, cmap='coolwarm', annot=True, linewidths=1, vmin=-1)

In [None]:
corr_coeff2 = all_data2.corr()
plt.figure(figsize = (10, 10))
sns.heatmap(corr_coeff2, cmap='coolwarm', annot=True, linewidths=1, vmin=-1)

In [None]:
sns.pairplot(all_data1)

In [None]:
sns.pairplot(all_data2)

## Approach 1

In [None]:
X = norm_sim1
X2 = sm.add_constant(X)
y = normalized_eval[["Avg Loss"]]
est = sm.OLS(y, X2)
est2 = est.fit()
params1 = est2.params
print(est2.summary())

In [None]:
y = normalized_eval[["MAE"]]
est = sm.OLS(y, X2)
est2 = est.fit()
params2 = est2.params
print(est2.summary())

In [None]:
y = normalized_eval[["RMSE"]]
est = sm.OLS(y, X2)
est2 = est.fit()
params3 = est2.params
print(est2.summary())

## Approach 2 

In [None]:
X = norm_sim2
X2 = sm.add_constant(X)
y = normalized_eval[["Avg Loss"]]
est = sm.OLS(y, X2)
est2 = est.fit()
print(est2.summary())

In [None]:
y = normalized_eval[["MAE"]]
est = sm.OLS(y, X2)
est2 = est.fit()
print(est2.summary())

In [None]:
y = normalized_eval[["RMSE"]]
est = sm.OLS(y, X2)
est2 = est.fit()
print(est2.summary())

# Canonical Correlation Analysis

## Approach 1

In [None]:
n_comp = 3
cca = CCA(n_components=n_comp, scale=True)
cca.fit(norm_sim1, normalized_eval)
U,V = cca.transform(norm_sim1, normalized_eval)

In [None]:
print(f"First pair: {np.corrcoef(U[:, 0], V[:, 0])[0,1]}")
print(f"Second pair: {np.corrcoef(U[:, 1], V[:, 1])[0,1]}")
print(f"Third pair: {np.corrcoef(U[:, 2], V[:, 2])[0,1]}")

In [None]:
np.corrcoef(U[:, 0], V[:, 0])[0,1] + np.corrcoef(U[:, 1], V[:, 1])[0,1]

In [None]:
comp_corr = [np.corrcoef(U[:, i], V[:, i])[1][0] for i in range(n_comp)]
plt.bar(['CC1', 'CC2', 'CC3'], comp_corr, color='grey', width = 0.8, edgecolor='black')
plt.plot(['CC1', 'CC2', 'CC3'], np.cumsum(comp_corr), c='blue')
plt.xlabel("Canonical Pair")
plt.ylabel("Percentage of Variance Explained")
plt.grid(None)
plt.title("Canonical Correlation Analysis")

In [None]:
cca.x_loadings_

In [None]:
cca.y_loadings_

## Approach 2

In [None]:
n_comp = 3
cca = CCA(n_components=n_comp, scale=True)
cca.fit(norm_sim2, normalized_eval)
U,V = cca.transform(norm_sim2, normalized_eval)

In [None]:
print(f"First pair: {np.corrcoef(U[:, 0], V[:, 0])[0,1]}")
print(f"Second pair: {np.corrcoef(U[:, 1], V[:, 1])[0,1]}")
print(f"Third pair: {np.corrcoef(U[:, 2], V[:, 2])[0,1]}")

In [None]:
np.corrcoef(U[:, 0], V[:, 0])[0,1] + np.corrcoef(U[:, 1], V[:, 1])[0,1] + np.corrcoef(U[:, 2], V[:, 2])[0,1]

In [None]:
comp_corr = [np.corrcoef(U[:, i], V[:, i])[1][0] for i in range(n_comp)]
plt.bar(['CC1', 'CC2', 'CC3'], comp_corr, color='grey', width = 0.8, edgecolor='black')
plt.plot(['CC1', 'CC2', 'CC3'], np.cumsum(comp_corr), c='blue')
plt.xlabel("Canonical Pair")
plt.ylabel("Percentage of Variance Explained")
plt.grid(None)
plt.title("Canonical Correlation Analysis")

In [None]:
cca.x_loadings_ 

In [None]:
cca.y_loadings_