In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import seaborn as sns
from sklearn import metrics

import sys
sys.path.append('C:/users/aaron/JupyterNotebook/W. Zhang Lab/projects/heartbeat/utils')
from utils import *

In [None]:
psd_mat_path = 'C:/users/aaron/JupyterNotebook/W. Zhang Lab/projects/heartbeat/out/psd_mat.csv'
psd_freqs_path = 'C:/users/aaron/JupyterNotebook/W. Zhang Lab/projects/heartbeat/out/psd_freqs.csv'
info_path = 'C:/users/aaron/JupyterNotebook/W. Zhang Lab/projects/heartbeat/out/info_df.csv'

In [None]:
psd_mat = np.loadtxt(psd_mat_path, delimiter=',')
psd_freqs = np.loadtxt(psd_freqs_path, delimiter=',')
info_df = pd.read_csv(info_path, index_col=0)
categories = info_df['category'].values
info_df

In [None]:
# datasets
datasets = {'age': ['Normal', '28Day', '14Day'], 
            'feeding': ['Normal', 'Thirste', 'Starved'], 
            'mating': ['Normal', 'Failed', 'Mated'], 
            'mated_vs_normal': ['Normal', 'Mated'], 
            'mated_vs_failed': ['Failed', 'Mated'], 
            'mated': ['Mated', 'Mated+Female'], 
            'failed': ['Failed', 'Failed+Female']}

In [None]:
X, Y = chooseDataset(psd_mat, categories, datasets['mated_vs_failed'], balancing=True)

f1_train, f1_test = getLDAResults(X, Y, n_epochs=100, printing=True, plotting=True, psd_freqs=psd_freqs)

# shuffle for each individual

In [None]:
idcs = chooseIdcs(categories, datasets['mated_vs_failed'], balancing=True)
X = psd_mat[idcs, :]
info = info_df.iloc[idcs]
Y = info.category.values
shuffled_idcs = np.zeros(len(Y)).astype(int)
for ID in info.ID.unique():
    subdf_idcs = np.where(info.ID==ID)[0]
    subdf_shuffled_idcs = subdf_idcs.copy()
    np.random.shuffle(subdf_shuffled_idcs)
    shuffled_idcs[subdf_idcs] = subdf_shuffled_idcs

Y_shuffled = Y[shuffled_idcs]

X, Y = chooseDataset(psd_mat, categories, datasets['mated_vs_failed'], balancing=True)

_, _ = getLDAResults(X, Y, n_epochs=100, printing=True, plotting=True, psd_freqs=psd_freqs)

_, _ = getLDAResults(X, Y_shuffled, n_epochs=100, printing=True, plotting=True, psd_freqs=psd_freqs)

# shuffle all

In [None]:
idcs = chooseIdcs(categories, datasets['mated_vs_failed'], balancing=True)
X = psd_mat[idcs, :]
info = info_df.iloc[idcs]
Y = info.category.values

Y_shuffled = Y.copy()
np.random.shuffle(Y_shuffled)

_, _ = getLDAResults(X, Y, n_epochs=100, printing=True, plotting=True, psd_freqs=psd_freqs)

_, _ = getLDAResults(X, Y_shuffled, n_epochs=100, printing=True, plotting=True, psd_freqs=psd_freqs)

In [None]:
X_norm = normalize(psd_mat)
X, Y = chooseDataset(X_norm, categories, datasets['mated_vs_failed'], balancing=True)
X_plus_female, Y_plus_female = chooseDataset(X_norm, categories, ['Failed+Female', 'Mated+Female', 'Normal'], balancing=False)

lda_model = LDA()
pcs_all = lda_model.fit_transform(X, Y)
pcs_plus_female = lda_model.transform(X_plus_female)
pcs_concat = np.concatenate([pcs_all, pcs_plus_female], axis=0)
Y_concat = np.concatenate([Y, Y_plus_female], axis=0)
f1_score_all = lda_model.score(X, Y)
plt.figure(figsize=[12,6], dpi=144, facecolor=[0.9,0.9,0.9])
plt.subplot(1,2,1)
result = doTtest(pcs_all.flatten(), Y)
sns.kdeplot(x=pcs_all.flatten(), hue=Y)

plt.title('LDA')
plt.text(x=plt.xlim()[0], y=plt.ylim()[1], s='p = %.3e'%result.pvalue)
plt.subplot(1,2,2)
sns.boxplot(x=Y_concat, y=pcs_concat.flatten())
plt.ylabel('ld1_projection')
plt.suptitle('all (f1_score = %.3f)'%f1_score_all)
plt.tight_layout()
plt.show()

In [None]:
X_norm = normalize(psd_mat)
X, Y = chooseDataset(X_norm, categories, ['Starved', 'Normal'], balancing=True)
X_plus_female, Y_plus_female = chooseDataset(X_norm, categories, ['Starved+Venigar'], balancing=False)

lda_model = LDA()
pcs_all = lda_model.fit_transform(X, Y)
pcs_plus_female = lda_model.transform(X_plus_female)
pcs_concat = np.concatenate([pcs_all, pcs_plus_female], axis=0)
Y_concat = np.concatenate([Y, Y_plus_female], axis=0)
f1_score_all = lda_model.score(X, Y)
plt.figure(figsize=[12,6], dpi=144, facecolor=[0.9,0.9,0.9])
plt.subplot(1,2,1)
result = doTtest(pcs_all.flatten(), Y)
sns.kdeplot(x=pcs_concat.flatten(), hue=Y_concat)

plt.title('LDA')
plt.text(x=plt.xlim()[0], y=plt.ylim()[1], s='p = %.3e'%result.pvalue)
plt.subplot(1,2,2)
sns.boxplot(x=Y_concat, y=pcs_concat.flatten())
plt.ylabel('ld1_projection')
plt.suptitle('all (f1_score = %.3f)'%f1_score_all)
plt.tight_layout()
plt.show()