# Load data

In [1]:
import numpy as np
Control = np.genfromtxt("normalRRs_CON.csv", delimiter =",")
Stroke = np.genfromtxt("normalRRs_STR.csv", delimiter =",")


In [2]:
# extract data used for Rank function analysis
Control = Control[:,1:501]
Stroke = Stroke[:,1:501]
Stroke = np.delete(Stroke,26,0)

# Compute PH and PI

In [3]:
import cripser
from persim import PersistenceImager
import matplotlib.pyplot as plt
import persim



In [4]:
# compute PH for all data
nc = Control.shape[0]
ns = Stroke.shape[0]
# for Controls
Control_PH = []
for i in range(nc):
    Control_PH.append(cripser.computePH(Control[i,:],maxdim=0)[:,1:3])

# for Stroke
Stroke_PH = []
for i in range(ns):
    Stroke_PH.append(cripser.computePH(Stroke[i,:],maxdim=0)[:,1:3])
    
# we only require the 1:3 columns of the computed PH
# since the array consists of dimension of the cycle, birth-time, death-time, location (x1,y1,z1) of the cell giving birth to the cycle, 
# and location (x2,y2,z2) of the cell destroying the cycle


In [5]:
# replace the last death time with the maximum of the death times other than infinity value
min_birth_times = []
max_birth_times = []
max_death_times = []
for i in range(nc):
    min_birth_times.append(np.min(Control_PH[i][:,0]))
    max_birth_times.append(np.max(Control_PH[i][:,0]))
    max_death_times.append(np.partition(Control_PH[i].flatten(), -2)[-2])
    
for j in range(ns):
    min_birth_times.append(np.min(Stroke_PH[j][:,0]))
    max_birth_times.append(np.max(Stroke_PH[j][:,0]))
    max_death_times.append(np.partition(Stroke_PH[j].flatten(), -2)[-2])
    
min_birth_time = min(min_birth_times)
max_birth_time = max(max_birth_times)
max_death_time = max(max_death_times)


In [8]:
min_birth_time

538.351

In [9]:
max_birth_time

1468.64

In [6]:
# remove all lines with the value 1.7976931348623157e+308 which is equivalent to infinity here
for i in range(nc):
    mat = Control_PH[i]
    # print(np.where(mat==1.7976931348623157e+308)[0])
    Control_PH[i] = np.delete(mat, np.where(mat==1.7976931348623157e+308)[0],0)

for i in range(ns):
    mat = Stroke_PH[i]
    Stroke_PH[i] = np.delete(mat, np.where(mat==1.7976931348623157e+308)[0],0)

In [7]:
# remove the nan in Stroke 25
mat = Stroke_PH[25]
Stroke_PH[25] = np.delete(mat, np.where(np.isnan(mat))[0],0)

In [8]:
# check maximum persistence
max_persistences = []
min_persistences = []

for i in range(nc):
    mat = Control_PH[i]
    pers = mat[:,1] - mat[:,0]
    min_persistences.append(np.min(pers))
    max_persistences.append(np.max(pers))
    
for i in range(ns):
    mat = Stroke_PH[i]
    pers = mat[:,1] - mat[:,0]
    min_persistences.append(np.min(pers))
    max_persistences.append(np.max(pers))

print(min(min_persistences))
print(max(max_persistences))

0.0060000000000854925
935.0


In [9]:
# try to obtain all the variable to define a PersistenceImager including:
# birth_range; pers_range; pixel_size; weight_params; kernel_params

# gridsize of rank functions is 100x100 so to have the same size input, pixwl_size = 10
# min_birth_time is 538.351
# max_birth_time is 1468.64
pimgr = PersistenceImager(pixel_size=10, birth_range=(500,1500), pers_range=(0,1000))

In [10]:
# compute persistence images
Control_PI = []
Stroke_PI = []

for i in range(nc):
    Control_PI.append(pimgr.transform([Control_PH[i]], skew=True)[0])
    
for i in range(ns):
    Stroke_PI.append(pimgr.transform([Stroke_PH[i]], skew=True)[0])

# SVM 

In [11]:
from sklearn.model_selection import cross_val_score
from sklearn import svm


In [13]:
# stack PIs into feature matrix
Control_PI_mat = np.array([mat.flatten() for mat in Control_PI])
Stroke_PI_mat = np.array([mat.flatten() for mat in Stroke_PI])

PI_mat = np.vstack([Control_PI_mat,Stroke_PI_mat])

In [14]:
# labels
y = np.array([0 for i in range(nc)]+[1 for i in range(ns)])

In [19]:
# Create SVM classifier
clf = svm.SVC(kernel='linear', C=1)

# Perform k-fold cross-validation
k = 5  # Number of folds
scores = cross_val_score(clf, PI_mat, y, cv=k)

# Print the accuracy for each fold
print("Accuracy for each fold:", scores)

# Calculate and print the average accuracy
print("Average accuracy:", np.mean(scores))

auc = cross_val_score(clf, PI_mat, y, scoring = "roc_auc", cv=k)

# Print the AUC_ROC for each fold
print("AUC_ROC for each fold:", auc)

# Calculate and print the average AUC_ROC
print("Average AUC_ROC:", np.mean(auc))

Accuracy for each fold: [0.44444444 0.70588235 0.64705882 0.64705882 0.58823529]
Average accuracy: 0.6065359477124183
AUC_ROC for each fold: [0.65       0.80555556 0.83333333 0.77777778 0.70833333]
Average AUC_ROC: 0.7550000000000001


In [22]:
from sklearn.model_selection import KFold

clf = svm.SVC(kernel='linear', C=1)

# compute average accuracy and AUC-ROC over 10 iterations
accs = []
aucs = []
for i in range(10):
    scores = cross_val_score(clf, PI_mat, y, cv=KFold(shuffle=True))
    accs.append(np.mean(scores))
    
    auc_scores = cross_val_score(clf, PI_mat, y, scoring = "roc_auc", cv=KFold(shuffle=True))
    aucs.append(np.mean(auc_scores))
    
print("accuracy:", sum(accs)/10)
print("AUC-ROC:", sum(aucs)/10)

accuracy: 0.6330065359477126
AUC-ROC: 0.7817609489276156


In [20]:
# Create SVM classifier
clf = svm.SVC(kernel='poly', C=1, degree = 3)

# Perform k-fold cross-validation
k = 5  # Number of folds
scores = cross_val_score(clf, PI_mat, y, cv=k)

# Print the accuracy for each fold
print("Accuracy for each fold:", scores)

# Calculate and print the average accuracy
print("Average accuracy:", np.mean(scores))

auc = cross_val_score(clf, PI_mat, y, scoring = "roc_auc", cv=k)

# Print the AUC_ROC for each fold
print("AUC_ROC for each fold:", auc)

# Calculate and print the average AUC_ROC
print("Average AUC_ROC:", np.mean(auc))

Accuracy for each fold: [0.44444444 0.47058824 0.52941176 0.47058824 0.52941176]
Average accuracy: 0.4888888888888888
AUC_ROC for each fold: [0.7375     0.72222222 0.70833333 0.77777778 0.58333333]
Average AUC_ROC: 0.7058333333333333


In [23]:
clf = svm.SVC(kernel='poly', C=1, degree=3)

# compute average accuracy and AUC-ROC over 10 iterations
accs = []
aucs = []
for i in range(10):
    scores = cross_val_score(clf, PI_mat, y, cv=KFold(shuffle=True))
    accs.append(np.mean(scores))
    
    auc_scores = cross_val_score(clf, PI_mat, y, scoring = "roc_auc", cv=KFold(shuffle=True))
    aucs.append(np.mean(auc_scores))
    
print("accuracy:", sum(accs)/10)
print("AUC-ROC:", sum(aucs)/10)

accuracy: 0.4518954248366013
AUC-ROC: 0.6841050646883982


In [216]:
# Create SVM classifier
clf = svm.SVC(kernel='poly', C=1, degree = 5)

# Perform k-fold cross-validation
k = 5  # Number of folds
scores = cross_val_score(clf, PI_mat, y, cv=k)

# Print the accuracy for each fold
print("Accuracy for each fold:", scores)

# Calculate and print the average accuracy
print("Average accuracy:", np.mean(scores))

auc = cross_val_score(clf, PI_mat, y, scoring = "roc_auc", cv=k)

# Print the AUC_ROC for each fold
print("AUC_ROC for each fold:", auc)

# Calculate and print the average AUC_ROC
print("Average AUC_ROC:", np.mean(auc))

Accuracy for each fold: [0.44444444 0.47058824 0.47058824 0.47058824 0.47058824]
Average accuracy: 0.465359477124183
AUC_ROC for each fold: [0.7125     0.77777778 0.70833333 0.80555556 0.54166667]
Average AUC_ROC: 0.7091666666666666


In [24]:
clf = svm.SVC(kernel='poly', C=1, degree=5)

accs = []
aucs = []
for i in range(10):
    scores = cross_val_score(clf, PI_mat, y, cv=KFold(shuffle=True))
    accs.append(np.mean(scores))
    
    auc_scores = cross_val_score(clf, PI_mat, y, scoring = "roc_auc", cv=KFold(shuffle=True))
    aucs.append(np.mean(auc_scores))
    
print("accuracy:", sum(accs)/10)
print("AUC-ROC:", sum(aucs)/10)

accuracy: 0.43352941176470583
AUC-ROC: 0.6770052971719639


In [212]:
# Create SVM classifier
clf = svm.SVC(kernel='poly', C=1, degree = 2)

# Perform k-fold cross-validation
k = 5  # Number of folds
scores = cross_val_score(clf, PI_mat, y, cv=k)

# Print the accuracy for each fold
print("Accuracy for each fold:", scores)

# Calculate and print the average accuracy
print("Average accuracy:", np.mean(scores))

# Calculate and print the average AUC_ROC
print("Average AUC_ROC:", np.mean(cross_val_score(clf, PI_mat, y, scoring = "roc_auc", cv=k)))

Accuracy for each fold: [0.44444444 0.58823529 0.52941176 0.47058824 0.52941176]
Average accuracy: 0.5124183006535947
Average AUC_ROC: 0.7144444444444444


In [25]:
clf = svm.SVC(kernel='poly', C=1, degree=2)

# compute average accuracy and AUC-ROC over 10 iterations
accs = []
aucs = []
for i in range(10):
    scores = cross_val_score(clf, PI_mat, y, cv=KFold(shuffle=True))
    accs.append(np.mean(scores))
    
    auc_scores = cross_val_score(clf, PI_mat, y, scoring = "roc_auc", cv=KFold(shuffle=True))
    aucs.append(np.mean(auc_scores))
    
print("accuracy:", sum(accs)/10)
print("AUC-ROC:", sum(aucs)/10)

accuracy: 0.45862745098039215
AUC-ROC: 0.6995929934263267


In [217]:
# Create SVM classifier
clf = svm.SVC(kernel='rbf', C=1)

# Perform k-fold cross-validation
k = 5  # Number of folds
scores = cross_val_score(clf, PI_mat, y, cv=k)

# Print the accuracy for each fold
print("Accuracy for each fold:", scores)

# Calculate and print the average accuracy
print("Average accuracy:", np.mean(scores))

auc = cross_val_score(clf, PI_mat, y, scoring = "roc_auc", cv=k)

# Print the AUC_ROC for each fold
print("AUC_ROC for each fold:", auc)

# Calculate and print the average AUC_ROC
print("Average AUC_ROC:", np.mean(auc))

Accuracy for each fold: [0.77777778 0.70588235 0.52941176 0.64705882 0.70588235]
Average accuracy: 0.673202614379085
AUC_ROC for each fold: [0.8125     0.72222222 0.70833333 0.77777778 0.72222222]
Average AUC_ROC: 0.7486111111111111


In [26]:
clf = svm.SVC(kernel='rbf', C=1)

accs = []
aucs = []
for i in range(10):
    scores = cross_val_score(clf, PI_mat, y, cv=KFold(shuffle=True))
    accs.append(np.mean(scores))
    
    auc_scores = cross_val_score(clf, PI_mat, y, scoring = "roc_auc", cv=KFold(shuffle=True))
    aucs.append(np.mean(auc_scores))
    
print("accuracy:", sum(accs)/10)
print("AUC-ROC:", sum(aucs)/10)

accuracy: 0.6582352941176473
AUC-ROC: 0.7556579192412525


In [218]:
# Create SVM classifier
clf = svm.SVC(kernel='sigmoid', C=1)

# Perform k-fold cross-validation
k = 5  # Number of folds
scores = cross_val_score(clf, PI_mat, y, cv=k)

# Print the accuracy for each fold
print("Accuracy for each fold:", scores)

# Calculate and print the average accuracy
print("Average accuracy:", np.mean(scores))

auc = cross_val_score(clf, PI_mat, y, scoring = "roc_auc", cv=k)

# Print the AUC_ROC for each fold
print("AUC_ROC for each fold:", auc)

# Calculate and print the average AUC_ROC
print("Average AUC_ROC:", np.mean(auc))

Accuracy for each fold: [0.61111111 0.58823529 0.76470588 0.64705882 0.64705882]
Average accuracy: 0.6516339869281046
AUC_ROC for each fold: [0.7        0.77777778 0.81944444 0.68055556 0.63888889]
Average AUC_ROC: 0.7233333333333334


In [27]:
clf = svm.SVC(kernel='sigmoid', C=1)

accs = []
aucs = []
for i in range(10):
    scores = cross_val_score(clf, PI_mat, y, cv=KFold(shuffle=True))
    accs.append(np.mean(scores))
    
    auc_scores = cross_val_score(clf, PI_mat, y, scoring = "roc_auc", cv=KFold(shuffle=True))
    aucs.append(np.mean(auc_scores))
    
print("accuracy:", sum(accs)/10)
print("AUC-ROC:", sum(aucs)/10)

accuracy: 0.58
AUC-ROC: 0.7450121267621268


# Sparse Linear SVM

In [15]:
from sklearn.svm import LinearSVC

lsvc2 = LinearSVC(penalty="l2")

# Perform k-fold cross-validation*
k = 5  # Number of folds
scores = cross_val_score(lsvc2, PI_mat, y, cv=k)

# Print the accuracy for each fold
print("Accuracy for each fold:", scores)

# Calculate and print the average accuracy
print("Average accuracy:", np.mean(scores))

auc = cross_val_score(lsvc2, PI_mat, y, scoring = "roc_auc", cv=k)

# Print the AUC_ROC for each fold
print("AUC_ROC for each fold:", auc)

# Calculate and print the average AUC_ROC
print("Average AUC_ROC:", np.mean(auc))

Accuracy for each fold: [0.61111111 0.76470588 0.70588235 0.64705882 0.64705882]
Average accuracy: 0.6751633986928105
AUC_ROC for each fold: [0.7        0.875      0.81944444 0.81944444 0.72222222]
Average AUC_ROC: 0.7872222222222224


In [19]:
from sklearn.model_selection import KFold
accs = []
aucs = []
for i in range(10):
    scores = cross_val_score(lsvc2, PI_mat, y, cv=KFold(shuffle=True))
    accs.append(np.mean(scores))
    
    auc_scores = cross_val_score(lsvc2, PI_mat, y, scoring = "roc_auc", cv=KFold(shuffle=True))
    aucs.append(np.mean(auc_scores))
    
print("accuracy:", sum(accs)/10)
print("AUC-ROC:", sum(aucs)/10)

accuracy: 0.684575163398693
AUC-ROC: 0.7932567155067155


In [21]:
# sparse svm

lsvc1 = LinearSVC(penalty="l1", dual=False, max_iter = 50000)

# Perform k-fold cross-validation*
k = 5  # Number of folds
scores = cross_val_score(lsvc1, PI_mat, y, cv=k)

# Print the accuracy for each fold
print("Accuracy for each fold:", scores)

# Calculate and print the average accuracy
print("Average accuracy:", np.mean(scores))

auc = cross_val_score(lsvc1, PI_mat, y, scoring = "roc_auc", cv=k)

# Print the AUC_ROC for each fold
print("AUC_ROC for each fold:", auc)

# Calculate and print the average AUC_ROC
print("Average AUC_ROC:", np.mean(auc))

Accuracy for each fold: [0.72222222 0.70588235 0.76470588 0.82352941 0.47058824]
Average accuracy: 0.6973856209150326
AUC_ROC for each fold: [0.6        0.69444444 0.75       0.80555556 0.56944444]
Average AUC_ROC: 0.6838888888888889


In [22]:

accs = []
aucs = []
for i in range(10):
    scores = cross_val_score(lsvc1, PI_mat, y, cv=KFold(shuffle=True))
    accs.append(np.mean(scores))
    
    auc_scores = cross_val_score(lsvc1, PI_mat, y, scoring = "roc_auc", cv=KFold(shuffle=True))
    aucs.append(np.mean(auc_scores))
    
print("accuracy:", sum(accs)/10)
print("AUC-ROC:", sum(aucs)/10)

accuracy: 0.6573856209150328
AUC-ROC: 0.6819287070953737


# Reduce feature matrix with PCA and apply SVM


In [34]:
from sklearn. decomposition import PCA


In [35]:
pca = PCA()
pca.fit(PI_mat)

PCA()

In [37]:
PI_pca = pca.fit_transform(PI_mat)

In [238]:
lsvc2 = LinearSVC(penalty="l2")

# Perform k-fold cross-validation*
k = 5  # Number of folds
scores = cross_val_score(lsvc2, PI_pca, y, cv=k)

# Print the accuracy for each fold
print("Accuracy for each fold:", scores)

# Calculate and print the average accuracy
print("Average accuracy:", np.mean(scores))

auc = cross_val_score(lsvc2, PI_pca, y, scoring = "roc_auc", cv=k)

# Print the AUC_ROC for each fold
print("AUC_ROC for each fold:", auc)

# Calculate and print the average AUC_ROC
print("Average AUC_ROC:", np.mean(auc))

Accuracy for each fold: [0.38888889 0.70588235 0.64705882 0.64705882 0.52941176]
Average accuracy: 0.5836601307189542
AUC_ROC for each fold: [0.6        0.75       0.79166667 0.73611111 0.69444444]
Average AUC_ROC: 0.7144444444444444


In [39]:
# Create SVM classifier
clf = svm.SVC(kernel='sigmoid', C=1)

# Perform k-fold cross-validation
k = 5  # Number of folds
scores = cross_val_score(clf, PI_pca, y, cv=k)

# Print the accuracy for each fold
print("Accuracy for each fold:", scores)

# Calculate and print the average accuracy
print("Average accuracy:", np.mean(scores))

auc = cross_val_score(clf, PI_pca, y, scoring = "roc_auc", cv=k)

# Print the AUC_ROC for each fold
print("AUC_ROC for each fold:", auc)

# Calculate and print the average AUC_ROC
print("Average AUC_ROC:", np.mean(auc))

Accuracy for each fold: [0.61111111 0.64705882 0.58823529 0.64705882 0.58823529]
Average accuracy: 0.6163398692810458
AUC_ROC for each fold: [0.7        0.77777778 0.81944444 0.70833333 0.63888889]
Average AUC_ROC: 0.7288888888888889


In [40]:
clf = svm.SVC(kernel='sigmoid', C=1)

accs = []
aucs = []
for i in range(10):
    scores = cross_val_score(clf, PI_pca, y, cv=KFold(shuffle=True))
    accs.append(np.mean(scores))
    
    auc_scores = cross_val_score(clf, PI_pca, y, scoring = "roc_auc", cv=KFold(shuffle=True))
    aucs.append(np.mean(auc_scores))
    
print("accuracy:", sum(accs)/10)
print("AUC-ROC:", sum(aucs)/10)

accuracy: 0.6300000000000001
AUC-ROC: 0.7393796882130215


In [240]:
# Create SVM classifier
clf = svm.SVC(kernel='rbf', C=1)

# Perform k-fold cross-validation
k = 5  # Number of folds
scores = cross_val_score(clf, PI_pca, y, cv=k)

# Print the accuracy for each fold
print("Accuracy for each fold:", scores)

# Calculate and print the average accuracy
print("Average accuracy:", np.mean(scores))

auc = cross_val_score(clf, PI_pca, y, scoring = "roc_auc", cv=k)

# Print the AUC_ROC for each fold
print("AUC_ROC for each fold:", auc)

# Calculate and print the average AUC_ROC
print("Average AUC_ROC:", np.mean(auc))

Accuracy for each fold: [0.77777778 0.70588235 0.52941176 0.64705882 0.70588235]
Average accuracy: 0.673202614379085
AUC_ROC for each fold: [0.8125     0.72222222 0.69444444 0.77777778 0.70833333]
Average AUC_ROC: 0.7430555555555556


In [41]:
clf = svm.SVC(kernel='rbf', C=1)

accs = []
aucs = []
for i in range(10):
    scores = cross_val_score(clf, PI_pca, y, cv=KFold(shuffle=True))
    accs.append(np.mean(scores))
    
    auc_scores = cross_val_score(clf, PI_pca, y, scoring = "roc_auc", cv=KFold(shuffle=True))
    aucs.append(np.mean(auc_scores))
    
print("accuracy:", sum(accs)/10)
print("AUC-ROC:", sum(aucs)/10)

accuracy: 0.6616993464052286
AUC-ROC: 0.7468121323121323


In [241]:
# Create SVM classifier
clf = svm.SVC(kernel='linear', C=1)

# Perform k-fold cross-validation
k = 5  # Number of folds
scores = cross_val_score(clf, PI_pca, y, cv=k)

# Print the accuracy for each fold
print("Accuracy for each fold:", scores)

# Calculate and print the average accuracy
print("Average accuracy:", np.mean(scores))

auc = cross_val_score(clf, PI_pca, y, scoring = "roc_auc", cv=k)

# Print the AUC_ROC for each fold
print("AUC_ROC for each fold:", auc)

# Calculate and print the average AUC_ROC
print("Average AUC_ROC:", np.mean(auc))

Accuracy for each fold: [0.44444444 0.70588235 0.64705882 0.64705882 0.58823529]
Average accuracy: 0.6065359477124183
AUC_ROC for each fold: [0.65       0.80555556 0.83333333 0.77777778 0.70833333]
Average AUC_ROC: 0.7550000000000001


In [42]:
clf = svm.SVC(kernel='linear', C=1)

accs = []
aucs = []
for i in range(10):
    scores = cross_val_score(clf, PI_pca, y, cv=KFold(shuffle=True))
    accs.append(np.mean(scores))
    
    auc_scores = cross_val_score(clf, PI_pca, y, scoring = "roc_auc", cv=KFold(shuffle=True))
    aucs.append(np.mean(auc_scores))
    
print("accuracy:", sum(accs)/10)
print("AUC-ROC:", sum(aucs)/10)

accuracy: 0.6324183006535948
AUC-ROC: 0.7627622932622933


In [243]:
# Create SVM classifier
clf = svm.SVC(kernel='poly', degree = 2, C=1)

# Perform k-fold cross-validation
k = 5  # Number of folds
scores = cross_val_score(clf, PI_pca, y, cv=k)

# Print the accuracy for each fold
print("Accuracy for each fold:", scores)

# Calculate and print the average accuracy
print("Average accuracy:", np.mean(scores))

auc = cross_val_score(clf, PI_pca, y, scoring = "roc_auc", cv=k)

# Print the AUC_ROC for each fold
print("AUC_ROC for each fold:", auc)

# Calculate and print the average AUC_ROC
print("Average AUC_ROC:", np.mean(auc))

Accuracy for each fold: [0.38888889 0.47058824 0.52941176 0.47058824 0.52941176]
Average accuracy: 0.47777777777777775
AUC_ROC for each fold: [0.775      0.83333333 0.79166667 0.81944444 0.55555556]
Average AUC_ROC: 0.7550000000000001


In [43]:
clf = svm.SVC(kernel='poly', C=1, degree=2)

accs = []
aucs = []
for i in range(10):
    scores = cross_val_score(clf, PI_pca, y, cv=KFold(shuffle=True))
    accs.append(np.mean(scores))
    
    auc_scores = cross_val_score(clf, PI_pca, y, scoring = "roc_auc", cv=KFold(shuffle=True))
    aucs.append(np.mean(auc_scores))
    
print("accuracy:", sum(accs)/10)
print("AUC-ROC:", sum(aucs)/10)

accuracy: 0.426797385620915
AUC-ROC: 0.7172067469567469


In [244]:
# Create SVM classifier
clf = svm.SVC(kernel='poly', degree = 3, C=1)

# Perform k-fold cross-validation
k = 5  # Number of folds
scores = cross_val_score(clf, PI_pca, y, cv=k)

# Print the accuracy for each fold
print("Accuracy for each fold:", scores)

# Calculate and print the average accuracy
print("Average accuracy:", np.mean(scores))

auc = cross_val_score(clf, PI_pca, y, scoring = "roc_auc", cv=k)

# Print the AUC_ROC for each fold
print("AUC_ROC for each fold:", auc)

# Calculate and print the average AUC_ROC
print("Average AUC_ROC:", np.mean(auc))

Accuracy for each fold: [0.44444444 0.47058824 0.47058824 0.47058824 0.47058824]
Average accuracy: 0.465359477124183
AUC_ROC for each fold: [0.75       0.75       0.68055556 0.80555556 0.55555556]
Average AUC_ROC: 0.7083333333333333


In [44]:
clf = svm.SVC(kernel='poly', C=1, degree=3)

accs = []
aucs = []
for i in range(10):
    scores = cross_val_score(clf, PI_pca, y, cv=KFold(shuffle=True))
    accs.append(np.mean(scores))
    
    auc_scores = cross_val_score(clf, PI_pca, y, scoring = "roc_auc", cv=KFold(shuffle=True))
    aucs.append(np.mean(auc_scores))
    
print("accuracy:", sum(accs)/10)
print("AUC-ROC:", sum(aucs)/10)

accuracy: 0.4220261437908497
AUC-ROC: 0.6951687510020843


In [245]:
# Create SVM classifier
clf = svm.SVC(kernel='poly', degree = 5, C=1)

# Perform k-fold cross-validation
k = 5  # Number of folds
scores = cross_val_score(clf, PI_pca, y, cv=k)

# Print the accuracy for each fold
print("Accuracy for each fold:", scores)

# Calculate and print the average accuracy
print("Average accuracy:", np.mean(scores))

auc = cross_val_score(clf, PI_pca, y, scoring = "roc_auc", cv=k)

# Print the AUC_ROC for each fold
print("AUC_ROC for each fold:", auc)

# Calculate and print the average AUC_ROC
print("Average AUC_ROC:", np.mean(auc))

Accuracy for each fold: [0.44444444 0.47058824 0.47058824 0.47058824 0.47058824]
Average accuracy: 0.465359477124183
AUC_ROC for each fold: [0.7125     0.79166667 0.61111111 0.86111111 0.55555556]
Average AUC_ROC: 0.706388888888889


In [45]:
clf = svm.SVC(kernel='poly', C=1, degree=5)

accs = []
aucs = []
for i in range(10):
    scores = cross_val_score(clf, PI_pca, y, cv=KFold(shuffle=True))
    accs.append(np.mean(scores))
    
    auc_scores = cross_val_score(clf, PI_pca, y, scoring = "roc_auc", cv=KFold(shuffle=True))
    aucs.append(np.mean(auc_scores))
    
print("accuracy:", sum(accs)/10)
print("AUC-ROC:", sum(aucs)/10)

accuracy: 0.4341176470588236
AUC-ROC: 0.6627600208433542
