In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch 
from torch import nn 
from torch.utils.data import Subset
from torch.utils.data import Dataset,DataLoader,TensorDataset, random_split 

import scipy.signal as signal
import scipy.stats as stats

from helper import hrv_feature_extractor
from helper import new_coral_training
from helper import new_compute_mae_and_mse
from dataset import personal_standarlization
from model import MyDataset
from model import MyDataLoader

## HRV feature extraction

In [None]:
#read ECG files
data = pd.read_csv("Biovid/input_ecg_part_a.csv", index_col = 0)
raw_signal = np.array(data.iloc[:,:-1])
biovid_label = np.array(data.iloc[:,-1])
print("ECG signal shape: ", raw_signal.shape)

biovid_subject_id = np.repeat(range(87), 100)

In [None]:
hrv_feature = hrv_feature_extractor(raw_signal)

In [None]:
hrv_feature = pd.DataFrame(hrv_feature)
hrv_feature.rename(columns={0: 'Value1', 1: 'Value2', 2: 'Value3',3: 'Value4',4: 'Value5'}, inplace=True)
hrv_feature["id"] = pd.Series(biovid_subject_id)

In [None]:
# Grouping by ID and calculating statistical indicators
grouped = hrv_feature.groupby('id')

result = grouped.agg({'Value1': ['min', 'max', 'var', 'std', 'mad', 'mean'],
                      'Value2': ['min', 'max', 'var', 'std', 'mad', 'mean'],
                      'Value3': ['min', 'max', 'var', 'std', 'mad', 'mean'],
                     'Value4': ['min', 'max', 'var', 'std', 'mad', 'mean'],
                     'Value5': ['min', 'max', 'var', 'std', 'mad', 'mean']})

# Rename columns
result.columns = result.columns.map('_'.join)

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
hrv = hrv_feature.iloc[:, :-1]
hrv_filled = hrv.fillna(hrv.mean())
dataframe_scaled = scaler.fit_transform(hrv_filled)

# Initialize the PCA model, set the number of principal components to be extracted to 1
pca = PCA(n_components=1)

# Use PCA model to fit the data and get the converted result (i.e. PC1).
dataframe_transformed = pca.fit_transform(dataframe_scaled)

# Convert the converted result into a DataFrame and give the corresponding column names.
dataframe_pc1 = pd.DataFrame(dataframe_transformed, columns=['PC1'])

dataframe_pc1["id"] = pd.Series(biovid_subject_id)

In [None]:
# Group by ID and calculate statistical indicators
grouped = dataframe_pc1.groupby('id')

result = grouped.agg({'PC1': ['min', 'max', 'var', 'std', 'mad', 'mean']})

# Rename the column name
result.columns = result.columns.map('_'.join)

# Print results
print(result)

In [None]:
result['cov'] = result['PC1_std'] / result['PC1_mean']

In [None]:
import numpy as np

def scale_with_tanh(vector, a, b):
    scaled_vector = (np.tanh(vector) + 1) * (b - a) / 2 + a
    return scaled_vector

# Scale using the Tanh function to scale the data range to [0.9, 1.1]
scaled_vector = scale_with_tanh(result['cov'], a=0.9, b=1.1)

In [None]:
scaled_vector

## Biovid data prepare

In [None]:
#read files
data = pd.read_csv("Biovid/input_gsr_part_a.csv", index_col = 0)
raw_signal_gsr = np.array(data.iloc[:,:-1])
biovid_label_gsr = np.array(data.iloc[:,-1])
total_valid_file = [i for i in range(87)]
subject_id = torch.tensor(biovid_subject_id)

Biovid_gsr_standarlizad = personal_standarlization(pd.DataFrame(raw_signal_gsr), biovid_subject_id)

standarlized_eda_tensor = torch.tensor(Biovid_gsr_standarlizad.values, dtype=torch.float32)

data = []
for i in range(standarlized_eda_tensor.shape[0]):
    sample = {
        'subject_id': biovid_subject_id[i],  # Assuming unique subject_ids starting from 0
        'signal': standarlized_eda_tensor[i],
        'label': biovid_label_gsr[i],
    }
    data.append(sample)


ds_biovid = MyDataset(data)

## Cross validation (let one subject out)

In [None]:
total_mae = []
total_mse = []

for subject in total_valid_file:
    print("current subject for testing: ", subject)
    test_mask = (subject_id == subject)
    training_mask = (subject_id != subject)

    heldout_data = Subset(ds_biovid, np.where(test_mask)[0])
    training_data = Subset(ds_biovid, np.where(training_mask)[0])

    # using dataloader 
    dl_train = MyDataLoader(training_data,batch_size = 32)
    dl_val = MyDataLoader(heldout_data,batch_size = 32)
    
    Coral_model = new_coral_training(dl_train, 5, scaled_vector, important_weight_type = "hard5", n_epochs=20)
    
    # Evaluate target data
    test_mae, test_mse = new_compute_mae_and_mse(Coral_model, dl_val)
    total_mae.append(test_mae)
    total_mse.append(test_mse)
    
print("LOSO MAE: ",np.mean(total_mae))
print("LOSO MSE: ",np.mean(total_mse))
print("LOSO RMSE: ", np.sqrt(np.mean(total_mse)))