In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import seaborn as sns
from sklearn import metrics

import sys
sys.path.append('C:/users/aaron/JupyterNotebook/W. Zhang Lab/projects/heartbeat/utils')
from utils import *

In [None]:
psd_mat_path = 'C:/users/aaron/JupyterNotebook/W. Zhang Lab/projects/heartbeat/out/psd_mat.csv'
psd_freqs_path = 'C:/users/aaron/JupyterNotebook/W. Zhang Lab/projects/heartbeat/out/psd_freqs.csv'
info_path = 'C:/users/aaron/JupyterNotebook/W. Zhang Lab/projects/heartbeat/out/info_df.csv'

# prep

In [None]:
psd_mat = np.loadtxt(psd_mat_path, delimiter=',')
psd_freqs = np.loadtxt(psd_freqs_path, delimiter=',')
info_df = pd.read_csv(info_path, index_col=0)
categories = info_df['category'].values
info_df.category.value_counts()

In [None]:
sns.lineplot(x=psd_freqs, y=psd_mat.mean(axis=0))
plt.show()
sns.lineplot(x=psd_freqs, y=psd_mat.std(axis=0))
plt.show()

In [None]:
# datasets
datasets = {'age': ['Normal', '28Day', '14Day'], 
            'feeding': ['Normal', 'Thirste', 'Starved'], 
            'mating': ['Normal', 'Failed', 'Mated'], 
            'mated_vs_normal': ['Normal', 'Mated'], 
            'mated_vs_failed': ['Failed', 'Mated'], 
            'mated': ['Mated', 'Mated+Female'], 
            'failed': ['Failed', 'Failed+Female'], 
            'all': ['Normal', '28Day', '14Day', 'Thirste', 'Starved', 'Failed', 'Mated']}

# LDA

In [None]:
X, Y = chooseDataset(psd_mat, categories, datasets['age'], balancing=True)

f1_train, f1_test = getLDAResults(X, Y, n_epochs=100, printing=True, plotting=True, psd_freqs=psd_freqs)

In [None]:
X, Y = chooseDataset(psd_mat, categories, datasets['feeding'], balancing=True)

f1_train, f1_test = getLDAResults(X, Y, n_epochs=100, printing=True, plotting=True, psd_freqs=psd_freqs)

In [None]:
X, Y = chooseDataset(psd_mat, categories, datasets['mating'], balancing=True)

f1_train, f1_test = getLDAResults(X, Y, n_epochs=100, printing=True, plotting=True, psd_freqs=psd_freqs)

In [None]:
X, Y = chooseDataset(psd_mat, categories, datasets['mated_vs_normal'], balancing=True)

f1_train, f1_test = getLDAResults(X, Y, n_epochs=100, printing=True, plotting=True, psd_freqs=psd_freqs)

In [None]:
X, Y = chooseDataset(psd_mat, categories, datasets['mated_vs_failed'], balancing=True)

f1_train, f1_test = getLDAResults(X, Y, n_epochs=100, printing=True, plotting=True, psd_freqs=psd_freqs)

In [None]:
X, Y = chooseDataset(psd_mat, categories, datasets['mated'], balancing=True, max_tol_imbalance_ratio=3)

f1_train, f1_test = getLDAResults(X, Y, n_epochs=100, printing=True, plotting=True, psd_freqs=psd_freqs)

In [None]:
X, Y = chooseDataset(psd_mat, categories, datasets['failed'], balancing=True, max_tol_imbalance_ratio=3)

f1_train, f1_test = getLDAResults(X, Y, n_epochs=100, printing=True, plotting=True, psd_freqs=psd_freqs)

In [None]:
# All
X, Y = chooseDataset(psd_mat, categories, datasets['all'], balancing=True, max_tol_imbalance_ratio=3)

f1_train, f1_test = getLDAResults(X, Y, n_epochs=100, printing=True, plotting=True, psd_freqs=psd_freqs)

## projecting (+ venigar) to the feeding state axis

In [None]:
lst = datasets['feeding'].copy()
lst.append('Starved+Venigar')

X, Y = chooseDataset(psd_mat, categories, lst, balancing=True, max_tol_imbalance_ratio=5)

f1_train, f1_test = getLDAResults(X, Y, n_epochs=100, printing=True, plotting=True, psd_freqs=psd_freqs)

In [None]:
lst = datasets['mating'].copy()
lst.append('Failed+Female')
lst.append('Mated+Female')

X, Y = chooseDataset(psd_mat, categories, lst, balancing=True, max_tol_imbalance_ratio=5)

f1_train, f1_test = getLDAResults(X, Y, n_epochs=100, printing=True, plotting=True, psd_freqs=psd_freqs)

In [None]:
lst = ['Starved', 'Starved+Venigar']

X, Y = chooseDataset(psd_mat, categories, lst, balancing=True, max_tol_imbalance_ratio=3)

f1_train, f1_test = getLDAResults(X, Y, n_epochs=100, printing=True, plotting=True, psd_freqs=psd_freqs)

# GMM

In [None]:
from sklearn.decomposition import PCA

In [None]:
X, Y = chooseDataset(psd_mat, categories, datasets['mated_vs_failed'], balancing=True)

X = normalize(X)

pca = PCA()
pcs = pca.fit_transform(X)
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.show()

In [None]:
X_slim = pcs[:,:5]

_,_ = getLDAResults(X_slim, Y, n_epochs=100, printing=True, plotting=True, psd_freqs=np.arange(5))

In [None]:
from sklearn.mixture import GaussianMixture as GMM

In [None]:
X, Y = chooseDataset(psd_mat, categories, datasets['mated_vs_failed'], balancing=True)
gmm = GMM(n_components=2)
labels_pred = gmm.fit_predict(X)
labels_pred

# Softmax + L1

In [None]:
from sklearn.linear_model import LogisticRegression as LR

In [None]:
X, Y = chooseDataset(psd_mat, categories, datasets['mated_vs_failed'], balancing=True)

X = normalize(X)

lr = LR(penalty='l1', C=0.3, solver='liblinear', max_iter=1000)
lr.fit(X, Y)
Y_pred = lr.predict(X)
f1 = metrics.f1_score(Y, Y_pred, average='macro')
plt.plot(psd_freqs, np.abs(lr.coef_.flatten()))
plt.title(f1)
plt.show()

# SVM

In [None]:
from sklearn.svm import SVC

In [None]:
X, Y = chooseDataset(psd_mat, categories, datasets['age'], balancing=True)
n_epochs = 100
score_train_list = []
score_test_list = []

for idx_epoch in range(n_epochs):
    
    X_train, X_test, Y_train, Y_test = splitDataset(X, Y, test_ratio=0.2)

    X_train, X_test = normalize_list([X_train, X_test])

    svm_model = SVC(kernel='linear')
    svm_model.fit(X_train, Y_train)
    score_train = svm_model.score(X_train, Y_train)
    score_test = svm_model.score(X_test, Y_test)

    score_train_list.append(score_train)
    score_test_list.append(score_test)
    
print('method: svm, kernel: linear')
print('n_epochs: %d'%n_epochs)
print('score_train: %.3f (+- %.3f)' % (np.mean(score_train_list), np.std(score_train_list)))
print('score_test: %.3f (+- %.3f)' % (np.mean(score_test_list), np.std(score_test_list)))


# pred_train = svm_model.predict(X_train)
# pred_test = svm_model.predict(X_test)
# acc_train = sum(Y_train == pred_train) / len(Y_train)
# acc_test = sum(Y_test == pred_test) / len(Y_test)
# print(acc_train, acc_test)


In [None]:
X, Y = chooseDataset(psd_mat, categories, datasets['feeding'], balancing=True)
n_epochs = 100
score_train_list = []
score_test_list = []

for idx_epoch in range(n_epochs):
    
    X_train, X_test, Y_train, Y_test = splitDataset(X, Y, test_ratio=0.2)

    X_train, X_test = normalize_list([X_train, X_test])

    svm_model = SVC(kernel='linear')
    svm_model.fit(X_train, Y_train)
    score_train = svm_model.score(X_train, Y_train)
    score_test = svm_model.score(X_test, Y_test)

    score_train_list.append(score_train)
    score_test_list.append(score_test)
    
print('method: svm, kernel: linear')
print('n_epochs: %d'%n_epochs)
print('score_train: %.3f (+- %.3f)' % (np.mean(score_train_list), np.std(score_train_list)))
print('score_test: %.3f (+- %.3f)' % (np.mean(score_test_list), np.std(score_test_list)))


In [None]:
X, Y = chooseDataset(psd_mat, categories, datasets['mating'], balancing=True)

X = normalize(X)

n_epochs = 100
score_train_list = []
score_test_list = []

for idx_epoch in range(n_epochs):
    
    X_train, X_test, Y_train, Y_test = splitDataset(X, Y, test_ratio=0.2)

    svm_model = SVC(kernel='linear')
    svm_model.fit(X_train, Y_train)
    score_train = svm_model.score(X_train, Y_train)
    score_test = svm_model.score(X_test, Y_test)

    score_train_list.append(score_train)
    score_test_list.append(score_test)
    
print('method: svm, kernel: linear')
print('n_epochs: %d'%n_epochs)
print('score_train: %.3f (+- %.3f)' % (np.mean(score_train_list), np.std(score_train_list)))
print('score_test: %.3f (+- %.3f)' % (np.mean(score_test_list), np.std(score_test_list)))


In [None]:
X, Y = chooseDataset(psd_mat, categories, datasets['mating'], balancing=True)

X = normalize(X)

n_epochs = 100
score_train_list = []
score_test_list = []

for idx_epoch in range(n_epochs):
    
    X_train, X_test, Y_train, Y_test = splitDataset(X, Y, test_ratio=0.2)

    svm_model = SVC(kernel='rbf')
    svm_model.fit(X_train, Y_train)
    score_train = svm_model.score(X_train, Y_train)
    score_test = svm_model.score(X_test, Y_test)

    score_train_list.append(score_train)
    score_test_list.append(score_test)
    
print('method: svm, kernel: linear')
print('n_epochs: %d'%n_epochs)
print('score_train: %.3f (+- %.3f)' % (np.mean(score_train_list), np.std(score_train_list)))
print('score_test: %.3f (+- %.3f)' % (np.mean(score_test_list), np.std(score_test_list)))


In [None]:
X, Y = chooseDataset(psd_mat, categories, datasets['mated_vs_failed'], balancing=True)

X = normalize(X)

C_list = np.array([0.01, 0.1, 1, 10, 100])
n_epochs = 100
kernel_type = 'linear'

print('method: svm, kernel: ' + kernel_type)

for C in C_list:
    print('C = %.2f' % C)
    score_train_list = []
    score_test_list = []

    for idx_epoch in range(n_epochs):

        X_train, X_test, Y_train, Y_test = splitDataset(X, Y, test_ratio=0.2)

        svm_model = SVC(kernel=kernel_type)
        svm_model.fit(X_train, Y_train)
        score_train = svm_model.score(X_train, Y_train)
        score_test = svm_model.score(X_test, Y_test)

        score_train_list.append(score_train)
        score_test_list.append(score_test)


    print('score_train: %.3f (+- %.3f)' % (np.mean(score_train_list), np.std(score_train_list)))
    print('score_test: %.3f (+- %.3f)' % (np.mean(score_test_list), np.std(score_test_list)))


In [None]:
X, Y = chooseDataset(psd_mat, categories, datasets['mated_vs_failed'], balancing=True)

X = normalize(X)

C_list = np.array([0.01, 0.1, 1, 10, 100])
n_epochs = 100
kernel_type = 'rbf'

print('method: svm, kernel: ' + kernel_type)

for C in C_list:
    print('C = %.2f' % C)
    score_train_list = []
    score_test_list = []

    for idx_epoch in range(n_epochs):

        X_train, X_test, Y_train, Y_test = splitDataset(X, Y, test_ratio=0.2)

        svm_model = SVC(kernel=kernel_type)
        svm_model.fit(X_train, Y_train)
        score_train = svm_model.score(X_train, Y_train)
        score_test = svm_model.score(X_test, Y_test)

        score_train_list.append(score_train)
        score_test_list.append(score_test)


    print('score_train: %.3f (+- %.3f)' % (np.mean(score_train_list), np.std(score_train_list)))
    print('score_test: %.3f (+- %.3f)' % (np.mean(score_test_list), np.std(score_test_list)))
