In [1]:
import warnings
warnings.filterwarnings('ignore')

from time import time
from datetime import timedelta

import pandas as pd
import seaborn as sns
import numpy as np

from dython.nominal import associations
from dython.nominal import correlation_ratio
from dython.nominal import cramers_v

from scipy.stats import chi2_contingency 
from scipy.stats import pearsonr 

from sklearn.metrics import confusion_matrix, \
                  classification_report, accuracy_score,  precision_score, recall_score, f1_score

from sklearn.preprocessing import minmax_scale

from sklearn.svm import LinearSVR, SVR, SVC
from sklearn.neighbors import KNeighborsRegressor

from sklearn.datasets import fetch_lfw_people
from sklearn.model_selection import train_test_split, cross_validate

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

from matplotlib import pyplot as plt 

In [2]:
def confusion(true, pred):
    """
    Function for pretty printing confusion matrices
    """
    true.name = 'target'
    pred.name = 'predicted'
    cm = pd.crosstab(true.reset_index(drop=True), pred.reset_index(drop=True))
    cm = cm[cm.index]
    return cm

In [3]:
results_df = pd.DataFrame(index=[], columns= ['Accuracy', 'F1 Macro', 'Precision Macro', 'Recall Macro'])

In [4]:
ILDS = pd.read_csv("train_features_ILDS.csv", delimiter=',')

ILDS.columns = ['Age', 'Female', 'TB', 'DB', 'Alkphos', 'Sgpt', 'Sgot', 'TP', 'ALB', 'AR']
ILDS['Female'] = ILDS['Female'].astype('category')
ILDS['target'] = pd.read_csv("train_labels_ILDS.csv", delimiter=',')

ILDS.shape

(462, 11)

In [5]:
X = ILDS.loc[:, ILDS.columns != 'target']
y = ILDS['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Polynomial

Degree 3

In [6]:
svm = SVC(kernel="poly", degree = 3)
svm.fit(X_train,y_train)
y_pred = svm.predict(X_val)
cross_val_results = pd.DataFrame(cross_validate(svm , X_train, y_train, cv = 5, 
                            scoring = ['accuracy', 'f1_macro', 'precision_macro', 'recall_macro'] ))


results_df.loc['SVM-poly-3',:] = cross_val_results[['test_accuracy', 'test_f1_macro',
       'test_precision_macro', 'test_recall_macro']].mean().values
results_df

Unnamed: 0,Accuracy,F1 Macro,Precision Macro,Recall Macro
SVM-poly-3,0.70849,0.414668,0.35569,0.497143


Degree 7

In [7]:
svm = SVC(kernel="poly", degree = 7)
svm.fit(X_train,y_train)
y_pred = svm.predict(X_val)
cross_val_results = pd.DataFrame(cross_validate(svm , X_train, y_train, cv = 5, 
                            scoring = ['accuracy', 'f1_macro', 'precision_macro', 'recall_macro'] ))


results_df.loc['SVM-poly-7',:] = cross_val_results[['test_accuracy', 'test_f1_macro',
       'test_precision_macro', 'test_recall_macro']].mean().values
results_df

Unnamed: 0,Accuracy,F1 Macro,Precision Macro,Recall Macro
SVM-poly-3,0.70849,0.414668,0.35569,0.497143
SVM-poly-7,0.712571,0.416074,0.356286,0.5


Degree 11

In [8]:
svm = SVC(kernel="poly", degree = 10)
svm.fit(X_train,y_train)
y_pred = svm.predict(X_val)
cross_val_results = pd.DataFrame(cross_validate(svm , X_train, y_train, cv = 5, 
                            scoring = ['accuracy', 'f1_macro', 'precision_macro', 'recall_macro'] ))


results_df.loc['SVM-poly-10',:] = cross_val_results[['test_accuracy', 'test_f1_macro',
       'test_precision_macro', 'test_recall_macro']].mean().values
results_df

Unnamed: 0,Accuracy,F1 Macro,Precision Macro,Recall Macro
SVM-poly-3,0.70849,0.414668,0.35569,0.497143
SVM-poly-7,0.712571,0.416074,0.356286,0.5
SVM-poly-10,0.692245,0.409017,0.353267,0.485714


In [9]:
svm = SVC(kernel="sigmoid", degree = 3)
svm.fit(X_train,y_train)
y_pred = svm.predict(X_val)
cross_val_results = pd.DataFrame(cross_validate(svm , X_train, y_train, cv = 5, 
                            scoring = ['accuracy', 'f1_macro', 'precision_macro', 'recall_macro'] ))


results_df.loc['SVM-sigmoid',:] = cross_val_results[['test_accuracy', 'test_f1_macro',
       'test_precision_macro', 'test_recall_macro']].mean().values
results_df

Unnamed: 0,Accuracy,F1 Macro,Precision Macro,Recall Macro
SVM-poly-3,0.70849,0.414668,0.35569,0.497143
SVM-poly-7,0.712571,0.416074,0.356286,0.5
SVM-poly-10,0.692245,0.409017,0.353267,0.485714
SVM-sigmoid,0.647592,0.436161,0.405141,0.48


In [10]:
ILDS_test = pd.read_csv("test_data_ILDS.csv", delimiter=',', header = None)

ILDS_test.columns = ['Age', 'Female', 'TB', 'DB', 'Alkphos', 'Sgpt', 'Sgot', 'TP', 'ALB', 'AR']

X_test = ILDS_test.loc[:,:'AR']

ILDS_test['Label'] = svm.predict(X_test)

ILDS_test.head()

ILDS_test.index = ILDS_test.index + 1
ILDS_test.index.name = 'ID'

ILDS_test['Label'].to_csv('SVM_raw.csv', index=True)