In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
data = pd.read_csv('Preprocessed_Data.csv')
predictors = data.iloc[:, :7]
target = data.iloc[:, 7]
data.head()

Unnamed: 0,Sex,Age,Height,Weight,Low Income,Lower Middle Income,Upper Middle Income,Status
0,1,5,75,17,1,0,0,Stunting
1,0,4,101,13,1,0,0,Stunting
2,0,4,71,17,1,0,0,Stunting
3,0,3,81,13,0,0,1,Stunting
4,0,1,79,16,0,0,1,Stunting


In [3]:
print(predictors.head())
print(target.head())

   Sex  Age  Height  Weight  Low Income  Lower Middle Income  \
0    1    5      75      17           1                    0   
1    0    4     101      13           1                    0   
2    0    4      71      17           1                    0   
3    0    3      81      13           0                    0   
4    0    1      79      16           0                    0   

   Upper Middle Income  
0                    0  
1                    0  
2                    0  
3                    1  
4                    1  
0    Stunting
1    Stunting
2    Stunting
3    Stunting
4    Stunting
Name: Status, dtype: object


In [4]:
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(predictors, target, test_size = 0.3)

In [5]:
from sklearn.svm import SVC
model = SVC()
model.fit(xtrain, ytrain)



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [6]:
predict = model.predict(xtest)

In [7]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
print(confusion_matrix(ytest, predict))
print(accuracy_score(ytest, predict))
print(classification_report(ytest, predict))

[[  0  22   0   0]
 [  0 209   0   0]
 [  0  18   0   0]
 [  0   3   0   0]]
0.8293650793650794
              precision    recall  f1-score   support

  Overweight       0.00      0.00      0.00        22
    Stunting       0.83      1.00      0.91       209
 Underweight       0.00      0.00      0.00        18
     Wasting       0.00      0.00      0.00         3

   micro avg       0.83      0.83      0.83       252
   macro avg       0.21      0.25      0.23       252
weighted avg       0.69      0.83      0.75       252



  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [8]:
from sklearn.model_selection import GridSearchCV
parameter = {'C' : [0.1,1,10,100, 1000], 'gamma': [1,0.1,0.01,0.001, 0.00001]}
grid = GridSearchCV(SVC(), param_grid=parameter, verbose=10)
grid.fit(xtrain, ytrain)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s


Fitting 3 folds for each of 25 candidates, totalling 75 fits
[CV] C=0.1, gamma=1 ..................................................
[CV] ......... C=0.1, gamma=1, score=0.8316326530612245, total=   0.1s
[CV] C=0.1, gamma=1 ..................................................
[CV] ......... C=0.1, gamma=1, score=0.8358974358974359, total=   0.1s
[CV] C=0.1, gamma=1 ..................................................
[CV] ......... C=0.1, gamma=1, score=0.8402061855670103, total=   0.1s
[CV] C=0.1, gamma=0.1 ................................................
[CV] ....... C=0.1, gamma=0.1, score=0.8316326530612245, total=   0.0s
[CV] C=0.1, gamma=0.1 ................................................
[CV] ....... C=0.1, gamma=0.1, score=0.8358974358974359, total=   0.0s
[CV] C=0.1, gamma=0.1 ................................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.4s remaining:    0.0s


[CV] ....... C=0.1, gamma=0.1, score=0.8402061855670103, total=   0.0s
[CV] C=0.1, gamma=0.01 ...............................................
[CV] ...... C=0.1, gamma=0.01, score=0.8316326530612245, total=   0.0s
[CV] C=0.1, gamma=0.01 ...............................................
[CV] ...... C=0.1, gamma=0.01, score=0.8358974358974359, total=   0.0s
[CV] C=0.1, gamma=0.01 ...............................................
[CV] ...... C=0.1, gamma=0.01, score=0.8402061855670103, total=   0.0s
[CV] C=0.1, gamma=0.001 ..............................................
[CV] ..... C=0.1, gamma=0.001, score=0.8316326530612245, total=   0.0s
[CV] C=0.1, gamma=0.001 ..............................................
[CV] ..... C=0.1, gamma=0.001, score=0.8358974358974359, total=   0.0s
[CV] C=0.1, gamma=0.001 ..............................................
[CV] ..... C=0.1, gamma=0.001, score=0.8402061855670103, total=   0.0s
[CV] C=0.1, gamma=1e-05 ..............................................


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.6s remaining:    0.0s


[CV] ..... C=0.1, gamma=1e-05, score=0.8316326530612245, total=   0.0s
[CV] C=0.1, gamma=1e-05 ..............................................
[CV] ..... C=0.1, gamma=1e-05, score=0.8358974358974359, total=   0.0s
[CV] C=0.1, gamma=1e-05 ..............................................
[CV] ..... C=0.1, gamma=1e-05, score=0.8402061855670103, total=   0.0s
[CV] C=1, gamma=1 ....................................................
[CV] ........... C=1, gamma=1, score=0.8316326530612245, total=   0.1s
[CV] C=1, gamma=1 ....................................................
[CV] ........... C=1, gamma=1, score=0.8358974358974359, total=   0.1s
[CV] C=1, gamma=1 ....................................................
[CV] ........... C=1, gamma=1, score=0.8402061855670103, total=   0.1s
[CV] C=1, gamma=0.1 ..................................................
[CV] ......... C=1, gamma=0.1, score=0.8316326530612245, total=   0.0s
[CV] C=1, gamma=0.1 ..................................................
[CV] .

[CV] .... C=1000, gamma=0.001, score=0.8769230769230769, total=   0.3s
[CV] C=1000, gamma=0.001 .............................................
[CV] .... C=1000, gamma=0.001, score=0.8762886597938144, total=   0.4s
[CV] C=1000, gamma=1e-05 .............................................
[CV] .... C=1000, gamma=1e-05, score=0.8316326530612245, total=   0.1s
[CV] C=1000, gamma=1e-05 .............................................
[CV] .... C=1000, gamma=1e-05, score=0.8358974358974359, total=   0.1s
[CV] C=1000, gamma=1e-05 .............................................
[CV] .... C=1000, gamma=1e-05, score=0.8402061855670103, total=   0.1s


[Parallel(n_jobs=1)]: Done  75 out of  75 | elapsed:    6.6s finished


GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'C': [0.1, 1, 10, 100, 1000], 'gamma': [1, 0.1, 0.01, 0.001, 1e-05]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=10)

In [9]:
grid.best_params_

{'C': 1000, 'gamma': 0.001}

In [10]:
grid.best_estimator_

SVC(C=1000, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [11]:
grid.best_score_

0.8769230769230769

In [12]:
gpredict = grid.predict(xtest)

In [13]:
print(confusion_matrix(ytest, gpredict))
print(classification_report(ytest, gpredict))

[[ 12  15   0   0]
 [ 12 195   0   0]
 [  0  17   0   0]
 [  1   0   0   0]]
              precision    recall  f1-score   support

  Overweight       0.48      0.44      0.46        27
    Stunting       0.86      0.94      0.90       207
 Underweight       0.00      0.00      0.00        17
     Wasting       0.00      0.00      0.00         1

   micro avg       0.82      0.82      0.82       252
   macro avg       0.33      0.35      0.34       252
weighted avg       0.76      0.82      0.79       252



  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [8]:
from sklearn.svm import SVC
model_final = SVC(C=1000, gamma=0.001)
model_final.fit(xtrain, ytrain)

SVC(C=1000, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [9]:
predictions = model_final.predict(xtest)

In [11]:
print(classification_report(ytest, predictions))
print(accuracy_score(ytest, predictions))

              precision    recall  f1-score   support

  Overweight       0.71      0.77      0.74        22
    Stunting       0.89      0.98      0.93       209
 Underweight       0.00      0.00      0.00        18
     Wasting       0.00      0.00      0.00         3

   micro avg       0.88      0.88      0.88       252
   macro avg       0.40      0.44      0.42       252
weighted avg       0.80      0.88      0.84       252

0.876984126984127


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
