In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, roc_auc_score
import matplotlib.pyplot as plt

df = pd.read_csv('fig2.csv')

X = df[['Trail', 'Max_value', 'time_to_peak', 'freezing', 'freezingTime']]
y = df['group']

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

param_grid = {'C': [0.1, 1], 'gamma': [0.1, 1], 'kernel': ['linear', 'rbf']}

grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=3, cv=5)
grid.fit(X_train, y_train)

print(grid.best_params_)

svm = SVC(C=grid.best_params_['C'], gamma=grid.best_params_['gamma'], kernel=grid.best_params_['kernel'], probability=True)
svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)

score = accuracy_score(y_test, y_pred)
print(f'Score: {score}')

cm = confusion_matrix(y_test, y_pred)
print(f'Confusion Matrix:\n{cm}')

y_prob = svm.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_prob)
auc = roc_auc_score(y_test, y_prob)




plt.plot(fpr, tpr, label=f'AUC = {auc:.2f}')
plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.show()



df_shuffled = df.sample(frac=1, random_state=42)
X1 = df[['Trail', 'Max_value', 'time_to_peak', 'freezing', 'freezingTime']]
y1 = df_shuffled['group']

X_train1, X_test1, y_train1, y_test1 = train_test_split(X1, y1, test_size=0.2, random_state=42)

param_grid1 = {'C': [0.1, 1], 'gamma': [0.1, 1], 'kernel': ['linear', 'rbf']}

grid1 = GridSearchCV(SVC(), param_grid1, refit=True, verbose=3, cv=5)
grid1.fit(X_train1, y_train1)

print(grid1.best_params_)

svm_control = SVC(C=grid1.best_params_['C'], gamma=grid1.best_params_['gamma'], kernel=grid1.best_params_['kernel'], probability=True)
svm_control.fit(X_train1, y_train1)

y_pred1 = svm_control.predict(X_test1)

score1 = accuracy_score(y_test1, y_pred1)
print(f'Score: {score1}')

cm1 = confusion_matrix(y_test1, y_pred1)
print(f'Confusion Matrix:\n{cm1}')

y_prob1 = svm_control.predict_proba(X_test1)[:, 1]
fpr1, tpr1, thresholds = roc_curve(y_test1, y_prob1)
auc1 = roc_auc_score(y_test1, y_prob1)

plt.plot(fpr1, tpr1, label=f'AUC1 = {auc1:.2f}')
plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.show()



plt.subplots()

plt.plot(fpr, tpr, label=f'AUC = {auc:.2f}')
plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve 1')
plt.legend()

plt.plot(fpr1, tpr1, label=f'AUC1 = {auc1:.2f}')
plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve 2')
plt.legend()
plt.show()


from sklearn.model_selection import KFold, cross_val_score
from scipy.stats import ttest_rel
import numpy as np
from sklearn.svm import SVC

df = pd.read_csv('fig2.csv')

X = df[['Trail', 'Max_value', 'time_to_peak', 'freezing', 'freezingTime']]
y = df['group']

scaler = StandardScaler()
X = scaler.fit_transform(X)

kfold = KFold(n_splits=5, shuffle=True, random_state=42)
svm_model = SVC(C=0.1, gamma=0.1, kernel='linear')

scores = []
for train_index, test_index in kfold.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    svm_model.fit(X_train, y_train)
    score = svm_model.score(X_test, y_test)
    scores.append(score)

n_shuffle = 100
scores_shuffle = np.zeros((kfold.n_splits, n_shuffle))
for i, (train_index, test_index) in enumerate(kfold.split(X)):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    for j in range(n_shuffle):
        y_shuffle = np.random.permutation(y_train)
        svm_model.fit(X_train, y_shuffle)
        score = svm_model.score(X_test, y_test)
        scores_shuffle[i, j] = score

scores_shuffle_mean = np.mean(scores_shuffle, axis=1)

t_statistic, p_value = ttest_rel(scores, scores_shuffle_mean)
if p_value < 0.05:
    print('significance')
else:
    print('non_significance')



result = pd.DataFrame(scores,scores_shuffle_mean)    
print(scores)
print(scores_shuffle_mean)
print(result)

FileNotFoundError: [Errno 2] No such file or directory: 'fig2.csv'