In [82]:
##Predicting Test data

In [83]:
#importing standard libraries
import pandas as pd
import numpy as np
import joblib as jb
import os
import sys
sys.path.append(os.path.dirname(os.getcwd()))
import config as cfg
import warnings
warnings.filterwarnings('ignore')

In [84]:
#Define a transformation function
class DataPreprocessor():
    def __init__(self,num_cols,cat_cols):
        self.num_cols=num_cols
        self.cat_cols=cat_cols
        self.scaler=jb.load(cfg.models+'scaler.joblib')
        self.encoder=jb.load(cfg.models+'encoder.joblib')
    def transform(self,X):
        X=X.copy()
        X['BMI']= X.apply(lambda row: row['Weight'] / row['Height']**2, axis=1)
        X.drop(['Weight', 'Height'], axis=1, inplace=True)
        X_cat = pd.DataFrame(self.encoder.transform(X[self.cat_cols]))
        X_cat.columns=list(self.encoder.get_feature_names_out())
        X.drop(self.cat_cols,axis=1, inplace=True)
        X_cat['log_age'] = X['Age'].apply(np.log)
        X.drop('Age', axis=1, inplace=True)
        X=pd.concat([X,X_cat],axis=1)
        X = self.scaler.transform(X)
        return X

In [85]:
# Create an instance of the DataPreprocessor class
preprocessor = DataPreprocessor(num_cols=['BMI', 'CH2O', 'FCVC'], cat_cols=['Gender', 'family_history_with_overweight', 'FAVC', 'CAEC', 'SMOKE', 'SCC', 'CALC', 'MTRANS'])


In [86]:
test_data=pd.read_csv(cfg.raw_test_data)
test_data.head(5)

Unnamed: 0,id,Gender,Age,Height,Weight,family_history_with_overweight,FAVC,FCVC,NCP,CAEC,SMOKE,CH2O,SCC,FAF,TUE,CALC,MTRANS
0,20758,Male,26.899886,1.848294,120.644178,yes,yes,2.938616,3.0,Sometimes,no,2.825629,no,0.8554,0.0,Sometimes,Public_Transportation
1,20759,Female,21.0,1.6,66.0,yes,yes,2.0,1.0,Sometimes,no,3.0,no,1.0,0.0,Sometimes,Public_Transportation
2,20760,Female,26.0,1.643355,111.600553,yes,yes,3.0,3.0,Sometimes,no,2.621877,no,0.0,0.250502,Sometimes,Public_Transportation
3,20761,Male,20.979254,1.553127,103.669116,yes,yes,2.0,2.977909,Sometimes,no,2.786417,no,0.094851,0.0,Sometimes,Public_Transportation
4,20762,Female,26.0,1.627396,104.835346,yes,yes,3.0,3.0,Sometimes,no,2.653531,no,0.0,0.741069,Sometimes,Public_Transportation


In [87]:
test_data_X=test_data.drop('id',axis=1)
id=test_data['id']
test_data_X.head(5)

Unnamed: 0,Gender,Age,Height,Weight,family_history_with_overweight,FAVC,FCVC,NCP,CAEC,SMOKE,CH2O,SCC,FAF,TUE,CALC,MTRANS
0,Male,26.899886,1.848294,120.644178,yes,yes,2.938616,3.0,Sometimes,no,2.825629,no,0.8554,0.0,Sometimes,Public_Transportation
1,Female,21.0,1.6,66.0,yes,yes,2.0,1.0,Sometimes,no,3.0,no,1.0,0.0,Sometimes,Public_Transportation
2,Female,26.0,1.643355,111.600553,yes,yes,3.0,3.0,Sometimes,no,2.621877,no,0.0,0.250502,Sometimes,Public_Transportation
3,Male,20.979254,1.553127,103.669116,yes,yes,2.0,2.977909,Sometimes,no,2.786417,no,0.094851,0.0,Sometimes,Public_Transportation
4,Female,26.0,1.627396,104.835346,yes,yes,3.0,3.0,Sometimes,no,2.653531,no,0.0,0.741069,Sometimes,Public_Transportation


In [88]:
transformed_data=preprocessor.transform(test_data_X)

#### Modelling: 

In [89]:
result_map={0: 'Insufficient_Weight',
 1: 'Normal_Weight',
 2: 'Obesity_Type_I',
 3: 'Obesity_Type_II',
 4: 'Obesity_Type_III',
 5: 'Overweight_Level_I',
 6: 'Overweight_Level_II'}

In [90]:
def map_res_val(val):
    return result_map[val]

In [112]:
#predicting using bgm
bgm_clf=jb.load(cfg.models+'bgm.joblib')
bgm_pred_data=pd.DataFrame(bgm_clf.predict(transformed_data),columns=['NObeyesdad']).applymap(map_res_val)
bgm=pd.concat([id,bgm_pred_data],axis=1)
bgm.head(7)



Unnamed: 0,id,NObeyesdad
0,20758,Obesity_Type_II
1,20759,Overweight_Level_I
2,20760,Obesity_Type_III
3,20761,Obesity_Type_I
4,20762,Obesity_Type_III
5,20763,Insufficient_Weight
6,20764,Insufficient_Weight


In [114]:
bgm.to_csv(cfg.input+'bgm.csv',index=False)

In [121]:
#predicting using ann
ann_clf=jb.load(cfg.models+'ann.joblib')
ann_pred=ann_clf.predict(transformed_data)
ann_pred_data=pd.DataFrame([ np.argmax(i) for i in ann_pred],columns=['NObeyesdad']).applymap(map_res_val)
ann=pd.concat([id,ann_pred_data],axis=1)
ann.head(7)



Unnamed: 0,id,NObeyesdad
0,20758,Obesity_Type_II
1,20759,Overweight_Level_I
2,20760,Obesity_Type_III
3,20761,Obesity_Type_II
4,20762,Obesity_Type_III
5,20763,Insufficient_Weight
6,20764,Insufficient_Weight


In [122]:
ann.to_csv(cfg.input+'ann.csv',index=False)