In [1]:
import torch
# import pickle
from torch.utils.data import DataLoader
from torch.autograd import Variable
from dataset_SNN import Train_Siamese
from model import Siamese
import time
import math
from sklearn.metrics import confusion_matrix, classification_report, f1_score
import pandas as pd
import statistics
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import geomloss
from sklearn import svm
import xgboost as xgb

import random
import os
import torch
import numpy as np
seed = 32
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True


def init_weights(m):
    if isinstance(m, torch.nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.01)


filename = '/home/grads/k/kexin/couple/fill_missing/filled_vanilla.csv'
df = pd.read_csv(filename)
df = df[~df.Stress.isnull()]
feature_head = ['ID', 'Stress', 'scl', 'nscrs.01', 'fscrs.01', 'amp.01', 'BodyTemp', 'IBI', 'BPM', 'HRV', 'AllWC', 'AllAnalytic', 'AllClout', 'AllAuthentic', 'AllTone', 'AllSixltr', 'AllDic', 'AllFunct', 'AllPronoun', 'AllPpron', 'AllI', 'AllWe', 'AllYou', 'AllShehe', 'AllThey', 'AllIpron', 'AllArticle', 'AllPreps', 'AllAuxverb', 'AllAdverb', 'AllConj', 'AllNegate', 'AllVerb', 'AllAdj', 'AllCompare', 'AllInterrog', 'AllNumber', 'AllQuant', 'AllAffect', 'AllPosemo', 'AllNegemo', 'AllAnx', 'AllAnger', 'AllSad', 'AllSocial', 'AllFamily', 'AllFriend', 'AllFemale', 'AllMale', 'AllCogproc', 'AllInsight', 'AllCause', 'AllDiscrep', 'AllTentat', 'AllCertain', 'AllDiffer', 'AllPercept', 'AllSee', 'AllHear', 'AllFeel', 'AllBio', 'AllBody', 'AllHealth', 'AllSexual', 'AllIngest', 'AllDrives', 'AllAffiliation', 'AllAchieve', 'AllPower', 'AllReward', 'AllRisk', 'AllWork', 'AllLeisure', 'AllHome', 'AllMoney', 'AllRelig', 'AllDeath', 'AllInformal', 'AllSwear', 'AllNetspeak', 'AllAssent', 'AllNonfl', 'AllFiller', 'PitchMean', 'PitchMedian', 'PitchSD', 'PitchMin', 'PitchMax', 'PitchRange', 'LoudMean', 'LoudMedian', 'LoudSD', 'LoudMax']
# feature_head = ['ID', 'partner', 'Report', 'Stress', 'scl', 'nscrs.01', 'fscrs.01', 'amp.01', 'BodyTemp', 'IBI', 'BPM', 'HRV', 'AllWC', 'AllAnalytic', 'AllClout', 'AllAuthentic', 'AllTone', 'AllSixltr', 'AllDic', 'AllFunct', 'AllPronoun', 'AllPpron', 'AllI', 'AllWe', 'AllYou', 'AllShehe', 'AllThey', 'AllIpron', 'AllArticle', 'AllPreps', 'AllAuxverb', 'AllAdverb', 'AllConj', 'AllNegate', 'AllVerb', 'AllAdj', 'AllCompare', 'AllInterrog', 'AllNumber', 'AllQuant', 'AllAffect', 'AllPosemo', 'AllNegemo', 'AllAnx', 'AllAnger', 'AllSad', 'AllSocial', 'AllFamily', 'AllFriend', 'AllFemale', 'AllMale', 'AllCogproc', 'AllInsight', 'AllCause', 'AllDiscrep', 'AllTentat', 'AllCertain', 'AllDiffer', 'AllPercept', 'AllSee', 'AllHear', 'AllFeel', 'AllBio', 'AllBody', 'AllHealth', 'AllSexual', 'AllIngest', 'AllDrives', 'AllAffiliation', 'AllAchieve', 'AllPower', 'AllReward', 'AllRisk', 'AllWork', 'AllLeisure', 'AllHome', 'AllMoney', 'AllRelig', 'AllDeath', 'AllInformal', 'AllSwear', 'AllNetspeak', 'AllAssent', 'AllNonfl', 'AllFiller', 'PitchMean', 'PitchMedian', 'PitchSD', 'PitchMin', 'PitchMax', 'PitchRange', 'LoudMean', 'LoudMedian', 'LoudSD', 'LoudMax']

df = df[feature_head]
df = df.dropna()

# feature_head2 = ['ID', 'partner', 'Report', 'Stress']
# df = df[feature_head2]

all_couples = [102, 104, 105, 117, 126, 133, 135, 155, 177, 183, 188, 708, 709, 711, 713, 714, 715, 716, 207, 717, 718, 719, 720, 721, 722, 723, 724, 725, 727, 729, 730, 731, 732, 733, 735, 736, 737, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 755, 756, 757, 758, 760, 762, 763, 765, 767, 769, 771, 772, 773, 774, 775, 776, 777, 779, 781, 783, 784, 786, 787, 789, 791, 793, 794, 796, 799, 800, 907, 908, 913, 916, 924, 930, 945, 966]
df = df[df['ID'].isin(all_couples)]

pred_global = []
y = []

for couple in all_couples:

    trainSet = Train_Siamese(filename, couple)
    trainLoader = DataLoader(trainSet, batch_size=16, shuffle=True)

    test_df = df.loc[df['ID'] == couple]
    select_count = int(len(test_df) * 0.4)
    test_df = test_df.iloc[select_count:, :]

    # test_df_stressed = test_df.loc[test_df['Stress'] != 0].to_numpy()
    # test_df_unstressed = test_df.loc[test_df['Stress'] == 0].to_numpy()
    # test_X = np.concatenate((test_df_stressed, test_df_unstressed), axis=0)
    # print(test_df_stressed)
    # exit()

    test_df_stressed = test_df.loc[test_df['Stress'] != 0].drop(columns=['Stress', 'ID']).to_numpy()
    test_df_unstressed = test_df.loc[test_df['Stress'] == 0].drop(columns=['Stress', 'ID']).to_numpy()

    test_X = np.concatenate((test_df_stressed, test_df_unstressed), axis=0)
    test_y = [1] * len(test_df_stressed) + [0] * len(test_df_unstressed)
    # print(test_df_stressed)
    # exit()
    y = y + test_y

    test_size = len(test_X)
    test_X = Variable(torch.tensor(np.array(test_X)))
    train_X = np.array(trainSet.train_X)
    train_y = trainSet.train_y

    loss_fn = torch.nn.BCELoss()

    net = Siamese()
    net.apply(init_weights)
    net.train()

    optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
    optimizer.zero_grad()
    running_loss = 0.0

    for epoch in range(15):
        running_loss = 0.0
        for batch_id, (data1, data2, label) in enumerate(trainLoader):
            data1, data2, label = Variable(data1), Variable(data2), Variable(label)
            optimizer.zero_grad()
            output = net.forward(data1, data2)
            # print(output)
            # print(label)

            loss_Siamese = loss_fn(output, label)

            test_X_embedding = net.forward_one(test_X)

            loss_Wasserstein = []
            # if trainSet.stress_ratio >= 0.5:
            #     ratios = [0.86, 0.5]
            # else:
            #     ratios = [0.14, 0.5]
            ratios = [0.90, 0.5, 0.10]

            scale_list = [int(len(test_X_embedding) * tmp) for tmp in ratios]
            for i in scale_list:
                random_stressed = list(np.random.choice(trainSet.train_stress_count, i, replace=False))
                random_unstressed = list(np.random.choice(trainSet.train_unstress_count, len(test_X_embedding) - i, replace=False) + trainSet.train_stress_count)
                train_X_embedding = Variable(torch.tensor(np.array(train_X[random_stressed + random_unstressed, :])))
                train_X_embedding = net.forward_one(train_X_embedding)
                loss_Wasserstein.append(geomloss.SamplesLoss(blur=0.14)(test_X_embedding, train_X_embedding))

            # loss = loss_Siamese
            loss = loss_Siamese + 0.28 * min(loss_Wasserstein) / len(test_X_embedding)

            running_loss += loss.detach()
            loss.backward()
            optimizer.step()

        torch.cuda.empty_cache()

    net.eval()
    train_X = Variable(torch.tensor(train_X))
    train_X = net.forward_one(train_X).detach().numpy()
    test_X = Variable(torch.tensor(np.array(test_X)))
    test_X = net.forward_one(test_X).detach().numpy()

    # np.save('3_bounds/train_X_' + str(couple) + '.npy', train_X)
    # np.save('3_bounds/train_y_' + str(couple) + '.npy', train_y)
    # np.save('3_bounds/test_X_' + str(couple) + '.npy', test_X)
    # np.save('3_bounds/test_y_' + str(couple) + '.npy', test_y)

    model = svm.SVC(C = 0.64, random_state=32)
    model.fit(train_X, train_y)
    pred = list(model.predict(test_X))
    pred_global = pred_global + pred

    print('-' * 5, 'couple:', couple, '-' * 5)
    print('pred:', pred)
    print('true:', test_y)

print(classification_report(y, pred_global, zero_division=0, digits=4), '\n')

# print(y)
# print(pred_global)


----- couple: 102 -----
pred: [1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0]
true: [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
----- couple: 104 -----
pred: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
true: [1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
----- couple: 105 -----
pred: [0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1]
true: [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
----- couple: 117 -----
pred: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
true: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
----- couple: 126 -----
pred: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
true: [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
----- couple: 133 -----
pred: [1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0]
true: [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
----- couple: 135 -----
pred: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
true: [1, 1, 0, 0, 0, 0, 0, 0, 0, 0]
----- couple: 155 -----
pred: [1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0]
true: [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]
----- 

----- couple: 777 -----
pred: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
true: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
----- couple: 779 -----
pred: [1, 1, 1, 1, 1, 1]
true: [1, 1, 1, 1, 1, 0]
----- couple: 781 -----
pred: [1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1]
true: [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
----- couple: 783 -----
pred: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
true: [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]
----- couple: 784 -----
pred: [1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1]
true: [1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
----- couple: 786 -----
pred: [1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0]
true: [1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
----- couple: 787 -----
pred: [1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0]
true: [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
----- couple: 789 -----
pred: [0, 0, 1, 0, 0, 1, 0, 0, 1, 0]
true: [1, 1, 0, 0, 0, 0, 0, 0, 0, 0]
----- couple: 791 -----
pred: [1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1]
true: