In [45]:
import numpy as np
import pandas as pd
import statsmodels.api as sm

from matplotlib.pyplot import subplots, cm
import sklearn.model_selection as skm
from ISLP import load_data, confusion_table
from sklearn.svm import SVC
from ISLP.svm import plot as plot_svm
from sklearn.metrics import RocCurveDisplay
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.preprocessing import StandardScaler
from statsmodels.formula.api import ols
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report


roc_curve = RocCurveDisplay.from_estimator

In [21]:
df = pd.read_csv('Obese10.csv')
df['NumberMainMeals'] = df['NumberMainMeals'].round(decimals=0)
df['FreqVegConsump'] = df['FreqVegConsump'].round(decimals=0)
df['WaterIntake'] = df['WaterIntake'].round(decimals=1)
df['PhysicalActivityFreq'] = df['PhysicalActivityFreq'].round(decimals=0)
df['TimeUsineTech'] = df['TimeUsingTech'].round(decimals=1)
df['BMI'] = df['BMI'].round(decimals=1)

x = df.drop(columns=['BMI','Gender', 'Age', 'familyHistoryOverWeight','Height', 'Weight', 'Classification'])
y = df['Classification']
Xtrain, Xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2)
scaler = StandardScaler()
scaler.fit(Xtrain)
Xtrain = scaler.transform(Xtrain)
Xtest = scaler.transform(Xtest)



In [24]:
svm_linear = SVC(C=10, kernel='linear')
svm_linear.fit(Xtrain, ytrain)

In [27]:
kfold = skm.KFold(5, 
                  random_state=0,
                  shuffle=True)
grid = skm.GridSearchCV(svm_linear,
                        {'C':[0.001,0.01,0.1,1,5,10,100]},
                        refit=True,
                        cv=kfold,
                        scoring='accuracy')
grid.fit(Xtrain, ytrain)
grid.best_params_

{'C': 0.01}

In [30]:
grid.cv_results_[('mean_test_score')]

array([0.61964602, 0.68718417, 0.68304216, 0.68244517, 0.68244693,
       0.68362861, 0.68481204])

In [29]:
best_ = grid.best_estimator_
y_test_hat = best_.predict(Xtest)
confusion_table(y_test_hat, ytest)

Truth,0,1
Predicted,Unnamed: 1_level_1,Unnamed: 2_level_1
0,129,57
1,86,150


In [32]:
# nonlinear SVM, rbf

svm_rbf = SVC(kernel="rbf", gamma=1, C=1)
svm_rbf.fit(Xtrain, ytrain)

In [35]:
kfold = skm.KFold(5, 
                  random_state=0,
                  shuffle=True)
grid = skm.GridSearchCV(svm_rbf,
                        {'C':[0.1,1,10,100,1000],
                         'gamma':[0.5,1,2,3,4]},
                        refit=True,
                        cv=kfold,
                        scoring='accuracy')
grid.fit(Xtrain, ytrain)
grid.best_params_

{'C': 10, 'gamma': 0.5}

In [36]:
grid.cv_results_[('mean_test_score')]

array([0.73518164, 0.71147613, 0.6812635 , 0.66526258, 0.63979773,
       0.83530455, 0.83649149, 0.81634506, 0.81457869, 0.80391551,
       0.84240865, 0.83530104, 0.82049234, 0.81397995, 0.80864397,
       0.82997559, 0.82523133, 0.81752322, 0.81279125, 0.81042087,
       0.8187207 , 0.8193089 , 0.81811143, 0.81220129, 0.812793  ])

In [37]:
best_ = grid.best_estimator_
y_test_hat = best_.predict(Xtest)
confusion_table(y_test_hat, ytest)

Truth,0,1
Predicted,Unnamed: 1_level_1,Unnamed: 2_level_1
0,176,33
1,39,174


In [43]:
y_pred = best_.predict(Xtest)
accuracy = accuracy_score(ytest, y_pred)
classification_report_result = classification_report(ytest, y_pred)
print(f'Accuracy: {accuracy}')
print('Classification Report:\n', classification_report_result)

Accuracy: 0.8293838862559242
Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.82      0.83       215
           1       0.82      0.84      0.83       207

    accuracy                           0.83       422
   macro avg       0.83      0.83      0.83       422
weighted avg       0.83      0.83      0.83       422



In [46]:
# nonlinear SVM, poly

svm_poly = SVC(kernel="poly", degree=3, C=1)
svm_poly.fit(Xtrain, ytrain)

kfold = KFold(n_splits=5, random_state=0, shuffle=True)

param_grid = {'C': [0.1, 1, 10, 100, 1000], 'degree': [2, 3, 4], 'coef0': [0, 1, 2]}

grid = GridSearchCV(svm_poly, param_grid, refit=True, cv=kfold, scoring='accuracy')
grid.fit(Xtrain, ytrain)

best_params = grid.best_params_
print(f'Best Parameters: {best_params}')

# Use the best model to make predictions on the test set
best_model = grid.best_estimator_
y_pred = best_model.predict(Xtest)

# Evaluate the model
accuracy = accuracy_score(ytest, y_pred)
classification_report_result = classification_report(ytest, y_pred)

# Print the results
print(f'Accuracy: {accuracy}')
print('Classification Report:\n', classification_report_result)


Best Parameters: {'C': 1, 'coef0': 2, 'degree': 3}
Accuracy: 0.8080568720379147
Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.73      0.80       215
           1       0.76      0.88      0.82       207

    accuracy                           0.81       422
   macro avg       0.82      0.81      0.81       422
weighted avg       0.82      0.81      0.81       422

