
<div style="display:fill;
           background-color:#ddc89caa;
           letter-spacing:0.5px;border-bottom: 2px solid black;">
<img src="https://images.unsplash.com/photo-1460306855393-0410f61241c7?q=80&h=500&w=2000&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D">
    
<H1 style="padding: 10px; color:black; font-weight:600;font-family: 'Garamond', 'Lucida Sans', sans-serif; text-align: center; font-size: 42px;">Multi-Class Prediction of Obesity Risk</H1>
</div>


In [None]:
import numpy as np 
import pandas as pd 
import os
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import cross_val_score, KFold
from keras.models import Sequential
from keras.layers import Dense, Dropout, LeakyReLU, GaussianNoise 
from sklearn.pipeline import Pipeline
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
import warnings
warnings.filterwarnings("ignore")
pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_palette("rocket")

<div style="background-color: #ddc89caa; padding: 20px; border-radius: 20px; border: 2px solid black;">
    <h1 style="font-family:  'Garamond', 'Lucida Sans', sans-serif; text-align: center; color: black; font-weight: bold; font-size: 42px;">
    Table of Contents
    </h1>
</div>

<a href="#1" style="font-family: 'Lucida Sans', 'Lucida Sans', sans-serif; text-align: left; color: #323232;font-size: 22px;"> 1. Dataset Overview </a><br>
<a href="#2" style="font-family: 'Lucida Sans', 'Lucida Sans', sans-serif; text-align: left; color: #323232;font-size: 22px;"> 2. Data Processing </a> <br>
<a href="#3" style="font-family: 'Lucida Sans', 'Lucida Sans', sans-serif; text-align: left; color: #323232;font-size: 22px;"> 3. Exploratory Data Analysis & Visualization </a> <br>
<a href="#4" style="font-family: 'Lucida Sans', 'Lucida Sans', sans-serif; text-align: left; color: #323232;font-size: 22px;"> 4. Training Models </a><br>
<a href="#4.1" style="font-family: 'Lucida Sans', 'Lucida Sans', sans-serif; text-align: left; color: #323232;font-size: 16px;padding-left: 25px;"> 4.1 Defining DL Network  </a><br>
<a href="#4.2" style="font-family: 'Lucida Sans', 'Lucida Sans', sans-serif; text-align: left; color: #323232;font-size: 16px;padding-left: 25px;"> 4.2 Training DL Network </a><br>
<a href="#4.3" style="font-family: 'Lucida Sans', 'Lucida Sans', sans-serif; text-align: left; color: #323232;font-size: 16px;padding-left: 25px;"> 4.3 Plotting Training Curves</a><br>
<a href="#7" style="font-family: 'Lucida Sans', 'Lucida Sans', sans-serif; text-align: left; color: #323232;font-size: 22px;"> 5. Creating 'submission.csv' </a><br>


<div id="1" style="background-color: #ddc89caa; padding: 20px; border-radius: 20px; border: 2px solid black;">
    <h1 style="font-family:  'Garamond', 'Lucida Sans', sans-serif; text-align: center; color: #000000; font-weight: bold; font-size: 42px;">
    Dataset Overview
    </h1>
</div>

In [None]:
orig_data = pd.read_csv("/kaggle/input/obesity-or-cvd-risk-classifyregressorcluster/ObesityDataSet.csv")
train_data = pd.read_csv("/kaggle/input/playground-series-s4e2/train.csv",index_col="id")
test_data = pd.read_csv("/kaggle/input/playground-series-s4e2/test.csv",index_col="id")

train_data = pd.concat([orig_data,train_data])
train_data.reset_index(drop=True,inplace=True)
train_data.rename(columns={"family_history_with_overweight":"FamilyHistory"},inplace=True)
test_data.rename(columns={"family_history_with_overweight":"FamilyHistory"},inplace=True)

In [None]:
train_data.head(10)

In [None]:
test_data.head(10)

<div id="2" style="background-color: #ddc89caa; padding: 20px; border-radius: 20px; border: 2px solid black;">
    <h1 style="font-family:  'Garamond', 'Lucida Sans', sans-serif; text-align: center; color: #000000; font-weight: bold; font-size: 42px;">
    Data Processing
    </h1>
</div>

In [None]:
train_data['BMI'] = train_data['Weight'] / (train_data['Height'] ** 2)
test_data['BMI'] = test_data['Weight'] / (test_data['Height'] ** 2)

In [None]:
le = LabelEncoder()
for col in ["CAEC","MTRANS",'Gender', 'FamilyHistory', 'FAVC', 'SMOKE', 'SCC','CALC']:
    train_data[col] = le.fit_transform(train_data[col])
    test_data[col] = le.transform(test_data[col])
    print(f"{col} Encoded")

In [None]:
ohe = OneHotEncoder(sparse_output=False,dtype="int64")
cols = list(train_data.columns)

train_data = pd.concat([train_data,pd.DataFrame(ohe.fit_transform(train_data[["NObeyesdad"]]))],axis=1)

cols.extend(list(ohe.categories_[0]))
train_data.columns = cols

In [None]:
train_data.head()

<div id="3" style="background-color: #ddc89caa; padding: 20px; border-radius: 20px; border: 2px solid black;">
    <h1 style="font-family:  'Garamond', 'Lucida Sans', sans-serif; text-align: center; color: #000000; font-weight: bold; font-size: 42px;">
    Exploratory Data Analysis & Visualization 
    </h1>
</div>

In [None]:
plt.subplots(4,2,figsize=(15,20),dpi=400)

for i,col in enumerate(["CAEC","MTRANS",'Gender', 'FamilyHistory', 'FAVC', 'SMOKE', 'SCC','CALC']):
    plt.subplot(4,2,i+1)
    sns.countplot(x=train_data[col])
    plt.title(f"Countplot of {col}")

In [None]:
plt.subplots(5,2,figsize=(15,25),dpi=400)

for i,col in enumerate(['Age', 'Height', 'Weight', 'BMI', 'FCVC', 'NCP', 'CH2O', 'FAF', 'TUE']):
    plt.subplot(5,2,i+1)
    sns.histplot(x=train_data[col],kde=True)
    plt.title(f"Histplot of {col}")

<div id="4" style="background-color: #ddc89caa; padding: 20px; border-radius: 20px; border: 2px solid black;">
    <h1 style="font-family:  'Garamond', 'Lucida Sans', sans-serif; text-align: center; color: #000000; font-weight: bold; font-size: 42px;">
   Training Models
    </h1>
</div>

In [None]:
train_data.drop(["NObeyesdad"],axis=1,inplace=True)
X = train_data.drop(list(ohe.categories_[0]),axis=1).values
y = train_data[list(ohe.categories_[0])].values

<div id="4.1" >
    <h1 style="font-family:  'Garamond', 'Lucida Sans', sans-serif; text-align: center; color: #263A29; font-weight: bold; font-size: 36px;">
   4.1 Defining DL Network
    </h1>
</div>
<hr>

In [None]:
model = Sequential()
model.add(Dense(16, input_dim=X.shape[1], activation=LeakyReLU(alpha=0.5)))
model.add(Dense(32, activation=LeakyReLU(alpha=0.5)))
model.add(GaussianNoise(0.2))
model.add(Dense(32, activation=LeakyReLU(alpha=0.5)))
model.add(Dropout(0.2))
model.add(Dense(7, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

<div id="4.2" >
    <h1 style="font-family:  'Garamond', 'Lucida Sans', sans-serif; text-align: center; color: #263A29; font-weight: bold; font-size: 36px;">
   4.2 Training DL Network
    </h1>
</div>
<hr>

In [None]:
modelCheckpoint = ModelCheckpoint('/kaggle/working/model/',save_best_only=True)
earlyStopping = EarlyStopping(monitor="val_loss",patience=40,start_from_epoch=5,restore_best_weights=True)
reduceLR = ReduceLROnPlateau(monitor="val_loss", factor=0.8, patience=4, min_lr=0.00001)

In [None]:
history = model.fit(X,y,epochs=250,batch_size=128,validation_split=0.3,callbacks = [earlyStopping,reduceLR,modelCheckpoint])

<div id="4.3" >
    <h1 style="font-family:  'Garamond', 'Lucida Sans', sans-serif; text-align: center; color: #263A29; font-weight: bold; font-size: 36px;">
   4.3 Plotting Training Curves
    </h1>
</div>
<hr>

In [None]:
histPlot = pd.DataFrame(history.history)

In [None]:
plt.subplots(3,1,figsize=(20,15),dpi=400)

plt.subplot(3,1,1)
sns.lineplot(histPlot[["loss","val_loss"]])
plt.title("Loss vs Val Loss");
plt.subplot(3,1,2)
sns.lineplot(histPlot[["accuracy","val_accuracy"]])
plt.title("Accuracy vs Val Accuracy");
plt.subplot(3,1,3);
sns.lineplot(histPlot[["lr"]])
plt.title("Learning Rate");

<div id="7" style="background-color: #e1d9ce; padding: 20px; border-radius: 20px; border: 2px solid black;">
    <h1 style="font-family:  'Garamond', 'Lucida Sans', sans-serif; text-align: center; color: #000000; font-weight: bold; font-size: 42px;">
   Creating 'submission.csv'
    </h1>
</div>

In [None]:
preds = model.predict(test_data)
ans = []
cols = ohe.categories_[0]
for i in range(len(test_data)):
    ans.append(cols[list(preds[i]).index(max(preds[i]))])

In [None]:
submission = test_data.copy()
submission["NObeyesdad"] = ans
submission[["NObeyesdad"]].head()

In [None]:
submission[["NObeyesdad"]].to_csv("submission.csv",index=True,header=True)