# **Load Dataset**

In [3]:
import pandas as pd 
import numpy as np

In [4]:
from google.colab import drive
drive.mount('/content/drive',force_remount=True)

Mounted at /content/drive


In [5]:
data = pd.read_csv("/content/drive/MyDrive/Batch 23/Weight Category Prediction Dataset.csv")

In [6]:
data.head()

Unnamed: 0,Gender,Height,Weight,Index
0,Male,174,96,4
1,Male,189,87,2
2,Female,185,110,4
3,Female,195,104,3
4,Male,149,61,3


In [7]:
data.tail()

Unnamed: 0,Gender,Height,Weight,Index
495,Female,150,153,5
496,Female,184,121,4
497,Female,141,136,5
498,Male,150,95,5
499,Male,173,131,5


# **One Hot Encording**

In [8]:
from sklearn.preprocessing import LabelBinarizer

In [9]:
gender_category=data['Gender']
gender_category

0        Male
1        Male
2      Female
3      Female
4        Male
        ...  
495    Female
496    Female
497    Female
498      Male
499      Male
Name: Gender, Length: 500, dtype: object

In [10]:
LB = LabelBinarizer()
gen_result = LB.fit_transform(gender_category)

In [None]:
gen_result

In [12]:
LB.classes_

array(['Female', 'Male'], dtype='<U6')

# **Hadle Imbalanced Dataset**

In [13]:
df= data.drop(['Index','Gender'],axis=1)

In [None]:
df['gender_new']=gen_result
df

In [15]:
x= df
y=data['Index']

In [None]:
y.value_counts()

In [None]:
y.value_counts().plot(kind='bar')

**SMOTE**

In [None]:
!pip install imbalanced-learn

In [19]:
from imblearn.over_sampling import SMOTE

In [20]:
smote = SMOTE()
x_smote, y_smote = smote.fit_resample(x,y)

In [None]:
y_smote.value_counts()

In [None]:
y_smote.value_counts().plot(kind='bar')

# **Train-Test Split**

In [23]:
from sklearn.model_selection import train_test_split

In [24]:
x_train, x_test, y_train, y_test = train_test_split(x_smote, y_smote, test_size=0.2, random_state=452)

In [25]:
len(y_train)

950

In [26]:
len(y_test)

238

In [27]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((950, 3), (238, 3), (950,), (238,))

# **Bagging & Boosting**

In [None]:
!pip install catboost

In [29]:
from sklearn.ensemble import RandomForestClassifier
import xgboost
import catboost
import lightgbm

In [30]:
model_RF = RandomForestClassifier(n_estimators=100)
model_xgboost = xgboost.XGBClassifier(n_estimators=150)         
model_catboost = catboost.CatBoostClassifier()
model_lgbm = lightgbm.LGBMClassifier()

In [None]:
model_RF.fit(x_train, y_train)
model_xgboost.fit(x_train, y_train)
model_catboost.fit(x_train, y_train)
model_lgbm.fit(x_train, y_train)

In [32]:
RF_pred = model_RF.predict(x_test)
xgboost_pred = model_xgboost.predict(x_test)
catboost_pred = model_catboost.predict(x_test)
lgbm_pred = model_lgbm.predict(x_test)

**Accuracy**

In [33]:
from sklearn.metrics import accuracy_score

In [53]:
print("RF: ", accuracy_score(y_test, RF_pred)*100)
print("XGBM: ", accuracy_score(y_test, xgboost_pred)*100)
print("CatBoost: ", accuracy_score(y_test, catboost_pred)*100)
print("LGBM: ", accuracy_score(y_test, lgbm_pred)*100)

RF:  94.9579831932773
XGBM:  91.59663865546219
CatBoost:  94.53781512605042
LGBM:  94.53781512605042


# **Confusion Matrix**

In [35]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [None]:
confusion_matrix(y_test, catboost_pred)

In [None]:
RF_report = pd.DataFrame(classification_report(y_test, RF_pred,output_dict=True))
RF_report 

In [None]:
xgb_report = pd.DataFrame(classification_report(y_test, xgboost_pred,output_dict=True))
xgb_report

In [None]:
cb_report = pd.DataFrame(classification_report(y_test, catboost_pred,output_dict=True))
cb_report 

In [None]:
lgbm_report = pd.DataFrame(classification_report(y_test, lgbm_pred,output_dict=True))
lgbm_report

# **Hyperparameter Tuning**

**GridSearch CV**

In [41]:
from sklearn.model_selection import GridSearchCV

In [42]:
param_grid = {
    'max_depth': [1,2,4,5,6,7,8,9,10,11],
    'n_estimators': [50,100]
}

In [43]:
grid_search = GridSearchCV(estimator=model_catboost,param_grid=param_grid)

In [None]:
Model_fit= grid_search.fit(x_train,y_train)

In [45]:
Best_Model = Model_fit.best_estimator_
Best_Model

<catboost.core.CatBoostClassifier at 0x7f21c38ad390>

In [46]:
Best_Model.score(x_test,y_test)*100

95.37815126050421

# **Check Overfitting and Underfitting**

In [48]:
print ('Train Accuracy - :' , Best_Model.score(x_train,y_train)*100)
print ('Test Accuracy - :' , Best_Model.score(x_test,y_test)*100)

Train Accuracy - : 99.47368421052632
Test Accuracy - : 95.37815126050421


# **Save Model**

In [49]:
import pickle

In [50]:
pickle.dump(Best_Model,open('/content/drive/MyDrive/Batch 23/Weight Category Predictor.pkl','wb'))

# **Live Prediction**

In [None]:
pred=Best_Model.predict(x_test)
pred

In [52]:
for i in range(2):
    print("\nGENDER--->>>> \n   female= 0/ male= 1\n")
    gender = int(input("Enter Your Gender -: "))
    Height = int(input("Enter Your Height  -: "))
    Weight= int(input("Enter Your Weight  -: "))
    print('\n')

    Cate =Best_Model.predict([[Height,Weight,gender]])

    def give_names_to_indices(Cate):
        if Cate==0:
            return 'Extremely Weak'
        elif Cate==1:
            return 'Weak'
        elif Cate==2:
            return 'Normal'
        elif Cate==3:
            return 'OverWeight'
        elif Cate==4:
            return 'Obese'
        elif Cate==5:
            return 'Extremely Obese'

    print ('Weight Category - :' , give_names_to_indices(Cate))
    print('=====================================================') 






GENDER--->>>> 
   female= 0/ male= 1

Enter Your Gender -: 0
Enter Your Height  -: 170
Enter Your Weight  -: 63


Weight Category - : Normal

GENDER--->>>> 
   female= 0/ male= 1

Enter Your Gender -: 1
Enter Your Height  -: 070
Enter Your Weight  -: 55


Weight Category - : OverWeight
