In [2]:
import time
import os
import numpy as np
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score


### Load the sEEG data, its shape is (number_of_electrode, samples_recorded), (84, 7526347)

In [3]:
stimulus_sEEG_data = np.load('../data/seeg.npy')

In [4]:
stimulus_sEEG_data.shape

(84, 7526347)

### Load the face labels for character Tony. Each second consists of 30 frames. Our labeling criterion is based on the presence of Tony: if more than 20 frames within a single second feature Tony, we label this second as '1'. Conversely, if this is not the case, we label it as '0'. 

In [5]:
face_labels=np.load('../data/label_20.npy')

### A single second of SEEG data contains 1024 samples. We will segment the SEEG data into one-second intervals. Any portion of the SEEG data that does not comprise a complete one-second segment will be excluded from analysis.

In [6]:
num_splits =stimulus_sEEG_data.shape[1]//1024

In [8]:
truncated_stimulus_sEEG_data= stimulus_sEEG_data[:,:num_splits*1024]

In [9]:
filtered_stimulus_sEEG_data = np.split(truncated_stimulus_sEEG_data,num_splits,axis=1)

### Truncate the face labels to match the duration of sEEG data

In [11]:
truncated_face_labels= face_labels[:num_splits]

In [12]:

# Flatten the  data
flattened_data = [second.flatten() for second in filtered_stimulus_sEEG_data]


### Perform SVM to do the classfication between Tony and Non-Tony video segments. 
In order to compare the accuracy with DNN, we also split the data into 70% traning, 15% testing and 15% validation, although 15% validation is not used.

In [13]:
# First split: 70% training, 30% temp (which will be split into test and validation)
X_train, X_temp, y_train, y_temp = train_test_split(flattened_data, truncated_face_labels, test_size=0.3, random_state=42)

# Second split: Split the temp data into test and validation (50% each of the temp data)
X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Now, X_train, y_train are our training sets (70% of the data)
# X_test, y_test are our test sets (15% of the data)
# X_val, y_val are our validation sets (15% of the data)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_val = scaler.transform(X_val)

### Evaluate the SVM with different kernel and  c (Regularization Parameter)

In [14]:


# Train the SVM
svm_model = SVC(kernel='linear')  
svm_model.fit(X_train, y_train)

# Evaluate the Model
y_pred = svm_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.4f}")

Test Accuracy: 0.5635


In [15]:
# Train the SVM
svm_model = SVC(C=0.001, kernel='linear')  
svm_model.fit(X_train, y_train)

# Evaluate the Model
y_pred = svm_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.4f}")

Test Accuracy: 0.6470


In [17]:
# Train the SVM
svm_model = SVC(kernel='poly')  
svm_model.fit(X_train, y_train)

# Evaluate the Model
y_pred = svm_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.4f}")

Test Accuracy: 0.6615


In [16]:
# Train the SVM
svm_model = SVC(C=0.001, kernel='poly')  
svm_model.fit(X_train, y_train)

# Evaluate the Model
y_pred = svm_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.4f}")

Test Accuracy: 0.6724
