In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score

In [2]:
data = pd.read_csv(r'C:\Users\dell\Desktop\MyDocs\Docs\MK\500_Person_Gender_Height_Weight_Index.csv')
print(data.describe())

           Height      Weight       Index
count  500.000000  500.000000  500.000000
mean   169.944000  106.000000    3.748000
std     16.375261   32.382607    1.355053
min    140.000000   50.000000    0.000000
25%    156.000000   80.000000    3.000000
50%    170.500000  106.000000    4.000000
75%    184.000000  136.000000    5.000000
max    199.000000  160.000000    5.000000


In [3]:
def give_names_to_indices(ind):
    if ind==0:
        return 'Extremely Weak'
    elif ind==1:
        return 'Weak'
    elif ind==2:
        return 'Normal'
    elif ind==3:
        return 'OverWeight'
    elif ind==4:
        return 'Obesity'
    elif ind==5:
        return 'Extremely Obese'

In [4]:
data['Index'] = data['Index'].apply(give_names_to_indices)

In [5]:
data

Unnamed: 0,Gender,Height,Weight,Index
0,Male,174,96,Obesity
1,Male,189,87,Normal
2,Female,185,110,Obesity
3,Female,195,104,OverWeight
4,Male,149,61,OverWeight
...,...,...,...,...
495,Female,150,153,Extremely Obese
496,Female,184,121,Obesity
497,Female,141,136,Extremely Obese
498,Male,150,95,Extremely Obese


In [6]:
people = data['Gender'].value_counts()

In [7]:
categories = data['Index'].value_counts()

In [8]:
# STATS FOR MEN
data[data['Gender']=='Male']['Index'].value_counts()

Index
Extremely Obese    105
Obesity             59
OverWeight          32
Normal              28
Weak                15
Extremely Weak       6
Name: count, dtype: int64

In [9]:
# STATS FOR WOMEN
data[data['Gender']=='Female']['Index'].value_counts()

Index
Extremely Obese    93
Obesity            71
Normal             41
OverWeight         36
Weak                7
Extremely Weak      7
Name: count, dtype: int64

In [10]:
data2 = pd.get_dummies(data['Gender'])
data.drop('Gender',axis=1,inplace=True)
data = pd.concat([data,data2],axis=1)

In [11]:
y=data['Index']
data =data.drop(['Index'],axis=1)

In [12]:
scaler = StandardScaler()
data = scaler.fit_transform(data)
data=pd.DataFrame(data)

In [13]:
X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.3, random_state=101)

In [20]:
param_grid = {'n_estimators':[100,200,300,400,500,600,700,800,1000]}
grid_cv = GridSearchCV(RandomForestClassifier(random_state=101),param_grid,verbose=3)

In [22]:
grid_cv.fit(X_train,y_train)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5] END ..................n_estimators=100;, score=0.857 total time=   0.1s
[CV 2/5] END ..................n_estimators=100;, score=0.886 total time=   0.1s
[CV 3/5] END ..................n_estimators=100;, score=0.829 total time=   0.1s
[CV 4/5] END ..................n_estimators=100;, score=0.786 total time=   0.1s
[CV 5/5] END ..................n_estimators=100;, score=0.900 total time=   0.1s
[CV 1/5] END ..................n_estimators=200;, score=0.843 total time=   0.3s
[CV 2/5] END ..................n_estimators=200;, score=0.900 total time=   0.4s
[CV 3/5] END ..................n_estimators=200;, score=0.843 total time=   0.3s
[CV 4/5] END ..................n_estimators=200;, score=0.829 total time=   0.4s
[CV 5/5] END ..................n_estimators=200;, score=0.886 total time=   0.3s
[CV 1/5] END ..................n_estimators=300;, score=0.843 total time=   0.4s
[CV 2/5] END ..................n_estimators=300;,

In [23]:
print(grid_cv.best_params_)

{'n_estimators': 200}


In [32]:
# weight category prediction
pred = grid_cv.predict(X_test)


print(classification_report(y_test,pred))
print('\n')
print(confusion_matrix(y_test,pred))
print('\n')
print('Acuuracy is --> ',accuracy_score(y_test,pred)*100)
print('\n')

def lp(details):
    gender = details[0]
    height = details[1]
    weight = details[2]
    
    if gender=='Male':
        details=np.array([[np.float64(height),np.float64(weight),0.0,1.0]])
    elif gender=='Female':
        details=np.array([[np.float64(height),np.float64(weight),1.0,0.0]])
    
    y_pred = grid_cv.predict(scaler.transform(details))
    return (y_pred[0])
    


                 precision    recall  f1-score   support

Extremely Obese       0.91      0.97      0.94        63
 Extremely Weak       1.00      1.00      1.00         1
         Normal       0.92      0.96      0.94        23
        Obesity       0.78      0.82      0.79        38
     OverWeight       0.92      0.58      0.71        19
           Weak       0.83      0.83      0.83         6

       accuracy                           0.87       150
      macro avg       0.89      0.86      0.87       150
   weighted avg       0.88      0.87      0.87       150



[[61  0  0  2  0  0]
 [ 0  1  0  0  0  0]
 [ 0  0 22  0  0  1]
 [ 6  0  0 31  1  0]
 [ 0  0  1  7 11  0]
 [ 0  0  1  0  0  5]]


Acuuracy is -->  87.33333333333333




In [34]:
#Live predictor
your_details = ['Male',175,80]
print(lp(your_details))

OverWeight




In [38]:
#Live predictor
your_details = ['Female',75,40]
print(lp(your_details))

Normal


