In [None]:
import sys
sys.path.append('../')  # Add the parent folder to the system path

from sklearn.tree import DecisionTreeClassifier
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

from util import preprocess

data = pd.read_csv("ObesityDataSet_raw_and_data_sinthetic.csv")

columns_to_encode = ['Gender', 'family_history_with_overweight', 'FAVC', 'CAEC', 'SMOKE', 'SCC','CALC','MTRANS']

df_final = preprocess.one_hot_encode(data, columns_to_encode)

column_to_move = df_final.pop('NObeyesdad')

# Reinsert the column at the end
df_final['NObeyesdad'] = column_to_move

# Custom mapping for target class (obesity level)
feature_mapping = {'Insufficient_Weight': 0,'Normal_Weight': 1,'Overweight_Level_I': 2,'Overweight_Level_II': 3,'Obesity_Type_I': 4,'Obesity_Type_II': 5,'Obesity_Type_III': 6}
df_final['NObeyesdad'] = df_final['NObeyesdad'].map(feature_mapping)

# Features : X , Labels : y
X, y = preprocess.sep_column(df_final, "NObeyesdad")

# split data into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=462)

In [None]:
clf = DecisionTreeClassifier()

# Train the decision tree model
clf.fit(X_train, y_train)

# Get feature importances and feature names
importances = clf.feature_importances_
feature_names = X_train.columns

# Sort feature importances in descending order
indices = np.argsort(importances)[::-1]

# Print the feature ranking
print("Feature ranking:")
for f in range(X_train.shape[1]):
    print(f"{f + 1}. Feature '{feature_names[indices[f]]}': {importances[indices[f]]}")

plt.figure()
plt.title("Feature Importances")
plt.bar(range(X_train.shape[1]), importances[indices], align="center")
plt.xticks(range(X_train.shape[1]), feature_names[indices], rotation=90)
plt.xlabel("Feature Name")
plt.ylabel("Feature Importance")
plt.show()

In [None]:
columns_to_use = ['Weight','Height','Gender','Age','CALC','FAVC','FCVC','MTRANS','NObeyesdad']

extracted_df = data[columns_to_use]

one_hot_encoded_columns = ['CALC','MTRANS','FAVC']
word_embedded_columns = ['Gender']

df_encoded_onehot = preprocess.one_hot_encode(extracted_df, one_hot_encoded_columns)

# Reinsert the column at the end
column_to_move = df_encoded_onehot.pop('NObeyesdad')
df_encoded_onehot['NObeyesdad'] = column_to_move

feature_mapping = {'Insufficient_Weight': 0,'Normal_Weight': 1,'Overweight_Level_I': 2,'Overweight_Level_II': 3,'Obesity_Type_I': 4,'Obesity_Type_II': 5,'Obesity_Type_III': 6}

df_encoded_onehot['NObeyesdad'] = df_encoded_onehot['NObeyesdad'].map(feature_mapping)

# Features : X , Labels : y
X, y = preprocess.sep_column(df_encoded_onehot, "NObeyesdad")

# split data into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=462)

min_vals = X_train.min()
max_vals = X_train.max()

column_to_be_normalized = ['Age','Height','Weight','FCVC']

preprocess.min_max_norm([X_train, X_test],column_to_be_normalized, min_vals, max_vals)

In [None]:
from torchtext.vocab import GloVe
from artificial_nn import ANN

# Load pre-trained Word2Vec embeddings(GloVe)
word_embeddings = GloVe(name='6B', dim=100)

def replace_gender_with_vectors(value,value_dict):
    if value == 'Male':
        return value_dict['Male']
    elif value == 'Female':
        return value_dict['Female']
    else:
        return None  # Handle other cases if necessary

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

#print(X_train)

X_train = preprocess.replace_with_word_embeddings(X_train, word_embeddings, word_embedded_columns)
X_test = preprocess.replace_with_word_embeddings(X_test, word_embeddings, word_embedded_columns)

#print(X_train)

X_train_tensor = torch.tensor(X_train.values.astype(np.float32))
X_test_tensor = torch.tensor(X_test.values.astype(np.float32))
y_train_tensor = torch.tensor(y_train.values)
y_test_tensor = torch.tensor(y_test.values)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

input_size = X_train_tensor.shape[1]
hidden_sizes = [64,64,64]               # hidden layer size is hyperparameter
output_size = len(y.unique())

model = ANN.ArtificialNeuralNetwork(input_size, hidden_sizes, output_size)

criterion = nn.CrossEntropyLoss()                       # cross entropy value is used as loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)    # learning rate is hyperparameter

num_epochs = 1000

for epoch in range(num_epochs):
    for inputs, labels in train_loader:
        optimizer.zero_grad()               # clears the gradients before new back prop (new batch)
        outputs = model.forward(inputs)     # feed model with forward prop (get predictions - outputs)
        loss = criterion(outputs, labels)   # calculate loss value of predictions
        loss.backward()                     # perform back prop to compute gradient w.r.t model params
        optimizer.step()                    # update the model params (weights) according to LR and gradient

    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}")

In [None]:
with torch.no_grad():
    model.eval()
    outputs = model.forward(X_test_tensor)
    _, predicted = torch.max(outputs, 1)
    accuracy = (predicted == y_test_tensor).sum().item() / y_test_tensor.size(0)

print(f"\nTest Accuracy: {accuracy:.4f}")