# Library Importation

In [70]:
import pandas as pd
import joblib
from sklearn import svm
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score

# Data Importation

In [90]:
raw_bmi = pd.read_csv('bmi.csv')

In [91]:
raw_bmi

Unnamed: 0,Gender,Height,Weight,Index
0,Male,174,96,4
1,Male,189,87,2
2,Female,185,110,4
3,Female,195,104,3
4,Male,149,61,3
...,...,...,...,...
495,Female,150,153,5
496,Female,184,121,4
497,Female,141,136,5
498,Male,150,95,5


# Preprocessing

In [92]:
#turning Gender to numerical
value_mapping = {'Male': 0, 'Female': 1}
raw_bmi['Gender'] = raw_bmi['Gender'].replace(value_mapping)
raw_bmi

Unnamed: 0,Gender,Height,Weight,Index
0,0,174,96,4
1,0,189,87,2
2,1,185,110,4
3,1,195,104,3
4,0,149,61,3
...,...,...,...,...
495,1,150,153,5
496,1,184,121,4
497,1,141,136,5
498,0,150,95,5


In [93]:
def min_max_scaling(column):
    min_value = column.min()
    max_value = column.max()
    print(min_value, max_value)
    return (column - min_value) / (max_value - min_value)

column_to_exclude = 'Index'
columns_to_scale = [col for col in raw_bmi.columns if col != column_to_exclude]
raw_bmi[columns_to_scale] = raw_bmi[columns_to_scale].apply(min_max_scaling)
raw_bmi

0 1
140 199
50 160


Unnamed: 0,Gender,Height,Weight,Index
0,0.0,0.576271,0.418182,4
1,0.0,0.830508,0.336364,2
2,1.0,0.762712,0.545455,4
3,1.0,0.932203,0.490909,3
4,0.0,0.152542,0.100000,3
...,...,...,...,...
495,1.0,0.169492,0.936364,5
496,1.0,0.745763,0.645455,4
497,1.0,0.016949,0.781818,5
498,0.0,0.169492,0.409091,5


In [74]:
X = raw_bmi.drop('Index', axis = 1)
y = raw_bmi['Index']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Support Vector Machines (SVM)

In [75]:
param_grid = [
    {'C': [0.1, 1, 10, 100, 1000], 'kernel': ['linear']},
    {'C': [0.1, 1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
    {'C': [0.1, 1, 10, 100, 1000], 'degree': [2, 3, 4], 'kernel': ['poly']},
 ]

In [53]:
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': [0.1, 1, 'scale', 'auto'],
    'degree': [2, 3, 4],
    'coef0': [0.0, 1.0],
}

In [76]:
clf = svm.SVC()
grid_search = GridSearchCV(estimator=clf, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

In [77]:
best_params = grid_search.best_params_
best_estimator = grid_search.best_estimator_

In [78]:
best_params

{'C': 1, 'kernel': 'linear'}

In [79]:
y_pred = best_estimator.predict(X_test)

In [80]:
accuracy_score(y_test, y_pred)

0.95

In [68]:
filename = 'svm_model.sav'
joblib.dump(best_estimator, filename)

['svm_model.sav']

In [96]:
gender = 'Male'
height = 176
weight = 90

In [97]:
gender = 0 if gender == 'Male' else 1
height = (height - 140)/(199 - 140)
weight = (weight - 50)/(160 - 50)

In [98]:
[gender, height, weight]

[0, 0.6101694915254238, 0.36363636363636365]

In [102]:
X_test

Unnamed: 0,Gender,Height,Weight
361,0,161,103
73,0,180,75
374,0,174,95
155,1,179,103
104,1,192,140
...,...,...,...
347,0,162,58
86,1,175,141
75,1,197,154
438,0,188,65


In [104]:
best_estimator.predict([[gender, height, weight]]).item()



5

In [105]:
index = {
    1: 'Extremely Weak',
    2: 'Normal',
    3: 'Overweight',
    4: 'Obese',
    5: 'Extremely Obese'
        }

In [108]:
index[3]

'Overweight'

In [37]:
clf = svm.SVC()
clf.fit(X_train, y_train)

In [40]:
y_pred = clf.predict(X_test)

In [41]:
y_pred == y_test

361    False
73      True
374     True
155     True
104     True
       ...  
347     True
86      True
75      True
438     True
15      True
Name: Index, Length: 100, dtype: bool

In [43]:
accuracy_score(y_test, y_pred)

0.93

# K-Nearest Neighbors (KNN)

# Decision Trees

# Random Forests

# XGBoost