In [1]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import cross_val_predict
from sklearn.preprocessing import StandardScaler

In [2]:
import pickle
import pandas as pd

LR_features = [
    'remainder__Active transportation', # from toronto only
    'remainder__Rate of Active population', # in toronto CMA
    'remainder__Overweight',
    'remainder__Obese',
    'remainder__log_median_age',
]

X_resampled_S = pd.read_csv("x_train.csv")
X_test_S = pd.read_csv("x_test.csv")
X_ext_S = pd.read_csv("x_ext.csv")

y_resampled_S = pd.read_csv("y_train.csv")
y_test_S = pd.read_csv("y_test.csv")
y_ext_S = pd.read_csv("y_ext.csv")

In [3]:
#Fitting the data
scaler = StandardScaler()
X_train_scaled_SVM = scaler.fit_transform(X_resampled_S[LR_features])
X_test_scaled_SVM = scaler.transform(X_test_S[LR_features])
X_ext_scaled_SVM = scaler.transform(X_ext_S[LR_features])

X_train = X_train_scaled_SVM
X_test = X_test_scaled_SVM
X_ext = X_ext_scaled_SVM

In [4]:
from tqdm import tqdm
from sklearn.svm import SVC

# Extract results into a DataFrame
results_SVM = pd.DataFrame(columns=[
    'kernel',
    'c',
    'gamma',

    'train_acc',
    'train_prec',
    'train_recall',
    'train_f1',

    'test_acc',
    'test_prec',
    'test_recall',
    'test_f1',

    'ext_acc',
    'ext_prec',
    'ext_recall',
    'ext_f1',
])

x_train = X_train_scaled_SVM
y_train = y_resampled_S['Diabetes']
x_test = X_test_scaled_SVM
y_test = y_test_S['Diabetes']
x_ext = X_ext_scaled_SVM
y_ext = y_ext_S['Diabetes']

C = [10**(i) for i in range(-3,4)]
kernel = ['rbf', 'linear'] #, 'poly', 'sigmoid']
gamma = [10**(i) for i in range(-6,2)]

for k in tqdm(kernel):
  for c in C:
    for g in gamma:
      svm = SVC(kernel=k, C=c, gamma=g, class_weight=None, probability=True)
      svm.fit(x_train, y_train)

      y_pred_train = cross_val_predict(svm, x_train, y_train, cv=5)
      train_accuracy = accuracy_score(y_train, y_pred_train)
      train_precision  = precision_score(y_train, y_pred_train)
      train_recall  = recall_score(y_train, y_pred_train)
      train_f1  = f1_score(y_train, y_pred_train)

      y_pred_test = svm.predict(x_test)
      test_accuracy = accuracy_score(y_test, y_pred_test)
      test_precision = precision_score(y_test, y_pred_test)
      test_recall = recall_score(y_test, y_pred_test)
      test_f1 = f1_score(y_test, y_pred_test)

      y_pred_ext = svm.predict(x_ext)
      ext_accuracy = accuracy_score(y_ext, y_pred_ext)
      ext_precision = precision_score(y_ext, y_pred_ext)
      ext_recall = recall_score(y_ext, y_pred_ext)
      ext_f1 = f1_score(y_ext, y_pred_ext)

      # Sample data for the new row
      new_row = {
          'kernel': k,
          'c': c,
          'gamma': g,

          'train_acc': train_accuracy,
          'train_prec': train_precision,
          'train_recall': train_recall,
          'train_f1': train_f1,

          'test_acc': test_accuracy,
          'test_prec': test_precision,
          'test_recall': test_recall,
          'test_f1': test_f1,

          'ext_acc': ext_accuracy,
          'ext_prec': ext_precision,
          'ext_recall': ext_recall,
          'ext_f1': ext_f1,
      }

      # Add the new row to results_SVM
      results_SVM = pd.concat([results_SVM, pd.DataFrame([new_row])], ignore_index=True)

results_SVM.to_csv('results_SVM.csv', index=False)

  results_SVM = pd.concat([results_SVM, pd.DataFrame([new_row])], ignore_index=True)
100%|██████████| 2/2 [00:58<00:00, 29.14s/it]


In [5]:
print("Finished")

Finished
