# Importing Packages

In [None]:
!pip install aisdc

Collecting aisdc
  Downloading aisdc-1.1.2-py3-none-any.whl (88 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.6/88.6 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting dictdiffer~=0.9.0 (from aisdc)
  Downloading dictdiffer-0.9.0-py2.py3-none-any.whl (16 kB)
Collecting fpdf~=1.7.2 (from aisdc)
  Downloading fpdf-1.7.2.tar.gz (39 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting multiprocess~=0.70.15 (from aisdc)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
Collecting scikit-learn~=1.1.3 (from aisdc)
  Downloading scikit_learn-1.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.5/30.5 MB[0m [31m34.9 MB/s[0m eta [36m0:00:00[0m
Collecting uuid~=1.30 (from aisdc)
  Downloading uuid-1.30.tar.gz (5.8 kB)
  

In [None]:
import os
import sys
import pylab as plt
import numpy as np
import logging
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
import pandas as pd
from sklearn.impute import KNNImputer
from sklearn import datasets
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, f1_score
from os.path import expanduser
from aisdc.safemodel.classifiers import SafeSVC
from sklearn.model_selection import GridSearchCV

# Data preperation

## Loading train data

In [None]:
train_data = pd.read_csv('/content/drive/MyDrive/ADNI/ADNI_train_volumes.tsv')
train_data = train_data.drop(columns=['Unnamed: 0', 'session_id', 'examination_date', 'earliest_time','rh.aparc.volume','lh.aparc.volume'])
#data = data.set_index('participant_id')
#data[~data.index.duplicated(keep='first')]
train_data = train_data.drop(columns=['participant_id', 'Measure:volume'])

diagnosis_mapping = {'MCI': 0, 'CN': 0, 'AD': 1, 0.666667: 0}
train_data['diagnosis'] = train_data['diagnosis'].map(diagnosis_mapping)

train_data = train_data[(train_data['diagnosis'] == 0) | (train_data['diagnosis'] == 1)]

## Loading test data

In [None]:
test_data = pd.read_csv('/content/drive/MyDrive/ADNI/ADNI_test_volumes.tsv')
test_data = test_data.drop(columns=['Unnamed: 0', 'Unnamed: 0.1', 'session_id','participant_id','rh.aparc.volume','lh.aparc.volume'])
diagnosis_mapping = {'MCI': 0, 'CN': 0, 'AD': 1}
test_data['diagnosis'] = test_data['diagnosis'].map(diagnosis_mapping)
test_data = test_data[(test_data['diagnosis'] == 0) | (test_data['diagnosis'] == 1)]

## Cleaning data

In [None]:
# Making sure both train and test having same features
common_columns = set(train_data.columns).intersection(set(test_data.columns))
train_data = train_data[common_columns]
test_data = test_data[common_columns]

In [None]:
# Imputing missing values using KNN
imputer = KNNImputer(n_neighbors=3)
train_data = pd.DataFrame(imputer.fit_transform(train_data), columns=train_data.columns)
test_data = pd.DataFrame(imputer.fit_transform(test_data), columns=test_data.columns)

## Split data

In [None]:
X_train = train_data.drop(columns=['diagnosis'])
X_test = test_data.drop(columns=['diagnosis'])
y_train = train_data['diagnosis']
y_test = test_data['diagnosis']

# SVM Model

## SVM Hyperparameter Optimisation

In [None]:
svm = SVC()

param_grid = {
    'kernel': ['linear', 'rbf'],
    'C': [0.1, 1, 10],
    'gamma': [0.1, 1, 'scale']
}

grid_search = GridSearchCV(estimator=svm, param_grid=param_grid, cv=3, scoring='f1')
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_score = grid_search.best_score_

In [None]:
print(best_params)

In [None]:
print(best_score)

## Final SVM Model & Evaluation

In [None]:
clf = svm.SVC(kernel='linear', gamma=3, C=7)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)

In [None]:
clf = svm.SVC(kernel='linear', gamma=2)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy: {accuracy}")
f1 = f1_score(y_test, predictions)
print(f"F1-Score: {f1}")

In [None]:
conf_matrix = confusion_matrix(y_test, predictions)
conf_matrix