#### **Classifying the Graph Metrics With SVC**

In [12]:
import pandas as pd
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.model_selection import StratifiedKFold, GroupKFold, cross_validate
import numpy as np

In [13]:
graph_metrics_df = pd.read_csv("../../data/current_graph_metrics.csv")
statuses_df = pd.read_csv("../../data/surface_and_status.csv")

In [14]:
df = graph_metrics_df.merge(statuses_df, on="SpecID").sort_values(by="SpecID").set_index('SpecID')
df

Unnamed: 0_level_0,PageRank,DegreeCentrality,EigenvectorCentrality,ArticleRank,LabelPropagation,Leiden,Louvain,SurID,Status
SpecID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
201210-1-00,0.830275,1170.371464,0.014458,0.195683,1,1,2289,201210-1,Normal
201210-1-01,0.811969,1134.613827,0.014017,0.194311,1,1,2289,201210-1,Normal
201210-1-02,0.580317,733.700940,0.009064,0.178658,1,127,988,201210-1,Normal
201210-1-03,0.744123,1017.430766,0.012569,0.189725,1,1,2289,201210-1,Normal
201210-1-04,0.794119,1107.509457,0.013682,0.193235,1,1,2289,201210-1,Normal
...,...,...,...,...,...,...,...,...,...
210526-3-45,1.128517,1689.660278,0.020874,0.215918,1,1,2289,210526-3,Hyperglycemia
210526-3-46,1.110504,1658.692469,0.020491,0.214711,1,1,2289,210526-3,Hyperglycemia
210526-3-47,1.145219,1718.172663,0.021226,0.217025,1,1,2289,210526-3,Hyperglycemia
210526-3-48,1.084825,1613.192575,0.019929,0.212946,1,1,2289,210526-3,Hyperglycemia


In [15]:
def evaluate_svm(df):
    # Set the Surfaces as groups
    groups = df['SurID']
    X = df.drop(['Status', 'SurID'], axis=1)
    y = df['Status']

    # Creating the SVM classifier
    #svm = SVC(random_state=1234)
    svm = SVC(random_state=1234, kernel='linear')
    
    # Using GroupKFold for classification tasks
    cv = GroupKFold(n_splits=5)

    scores = cross_validate(svm, X, y, groups=groups, cv=cv, scoring=['accuracy'], n_jobs=-1)
    
    # Displaying the results
    print(f'{svm.__class__.__name__} Cross-Validation Accuracy: {np.mean(scores["test_accuracy"]):.4f} +/- {np.std(scores["test_accuracy"]):.4f}')

In [16]:
evaluate_svm(df)

>#### Classify based on FastRP Embeddings

In [None]:
fastRP_df = pd.read_csv("../../data/fastRP_embeddings.csv")

In [None]:
df = fastRP_df.merge(statuses_df, on="SpecID").sort_values(by="SpecID").set_index('SpecID')
df

Unnamed: 0_level_0,embedding_0,embedding_1,embedding_2,embedding_3,embedding_4,embedding_5,embedding_6,embedding_7,embedding_8,embedding_9,...,embedding_119,embedding_120,embedding_121,embedding_122,embedding_123,embedding_124,embedding_125,embedding_126,embedding_127,Status
SpecID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
201210-1-00,-0.080063,0.134346,0.311409,0.068268,0.220651,0.020809,-0.157989,-0.023693,0.139395,0.119681,...,0.062691,-0.112076,0.184696,0.327179,-0.075120,0.091235,-0.209512,0.040763,-0.094208,Normal
201210-1-01,-0.080101,0.134585,0.310871,0.067332,0.220452,0.020230,-0.158860,-0.023553,0.139669,0.118961,...,0.063693,-0.111742,0.186010,0.326752,-0.076596,0.091260,-0.211565,0.040145,-0.094633,Normal
201210-1-02,-0.079054,0.138156,0.306293,0.055854,0.212508,0.017010,-0.174445,-0.019908,0.143086,0.116697,...,0.065156,-0.112998,0.196053,0.328230,-0.083976,0.093691,-0.220491,0.040576,-0.086324,Normal
201210-1-03,-0.079920,0.136971,0.307921,0.059693,0.213916,0.018670,-0.169574,-0.020787,0.141887,0.117424,...,0.063974,-0.112868,0.192851,0.328275,-0.082046,0.093300,-0.217178,0.040930,-0.088214,Normal
201210-1-04,-0.079154,0.133878,0.312250,0.070663,0.223848,0.020361,-0.154181,-0.024949,0.138894,0.120166,...,0.062828,-0.111242,0.182163,0.326090,-0.072420,0.090532,-0.207949,0.039952,-0.096966,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
210526-3-45,-0.077160,0.133637,0.313199,0.071438,0.226589,0.019494,-0.154102,-0.025647,0.139182,0.121855,...,0.062112,-0.111478,0.180060,0.326621,-0.067590,0.089882,-0.204368,0.040522,-0.096711,Hyperglycemia
210526-3-46,-0.077198,0.133361,0.313462,0.072150,0.227169,0.019669,-0.153159,-0.025882,0.138980,0.122001,...,0.062088,-0.111391,0.179395,0.326495,-0.067051,0.089678,-0.203765,0.040492,-0.097252,Hyperglycemia
210526-3-47,-0.076899,0.134395,0.312508,0.069708,0.225286,0.019181,-0.156088,-0.025093,0.139682,0.121436,...,0.062091,-0.111497,0.181595,0.326650,-0.068859,0.090412,-0.205910,0.040582,-0.095570,Hyperglycemia
210526-3-48,-0.077383,0.132969,0.313717,0.072891,0.227793,0.019769,-0.152275,-0.026112,0.138768,0.122063,...,0.062284,-0.111309,0.178866,0.326396,-0.066739,0.089389,-0.203345,0.040377,-0.097908,Hyperglycemia


In [None]:
evaluate_svm(df)

Overall Accuracy: 0.7934426229508197


Classification Report:
               precision    recall  f1-score   support

Hyperglycemia       0.72      0.74      0.73        91
 Hypoglycemia       0.81      0.78      0.79       107
       Normal       0.84      0.86      0.85       107

     accuracy                           0.79       305
    macro avg       0.79      0.79      0.79       305
 weighted avg       0.79      0.79      0.79       305


Confusion Matrix:
[[67 15  9]
 [15 83  9]
 [11  4 92]]
Overall Accuracy: 0.7672131147540984


Classification Report:
               precision    recall  f1-score   support

Hyperglycemia       0.73      0.79      0.76        91
 Hypoglycemia       0.76      0.69      0.73       107
       Normal       0.81      0.82      0.81       107

     accuracy                           0.77       305
    macro avg       0.77      0.77      0.77       305
 weighted avg       0.77      0.77      0.77       305


Confusion Matrix:
[[72 12  7]
 [19 74 14]
 