In [22]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import joblib

In [23]:
# Load the dataset
data = pd.read_csv('obesity_data.csv')

In [24]:
# Separate features and target variable
X = data[['Age', 'Gender', 'Height', 'Weight', 'BMI', 'PhysicalActivityLevel']]
y = data['ObesityCategory']

In [25]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [26]:
# Standardize the data (important for algorithms like SVM and KNN)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [27]:
from sklearn.decomposition import PCA

In [28]:
# Apply PCA
pca = PCA(n_components=2)  # reduce to 2 components
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

In [29]:
# Train a model using the PCA-transformed data (Example: RandomForest)
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score , confusion_matrix

In [30]:
# Initialize the model
rf_pca = RandomForestClassifier(n_estimators=100, random_state=42)

In [31]:
# Train the model
rf_pca.fit(X_train_pca, y_train)

In [32]:
# Make predictions
y_pred_pca = rf_pca.predict(X_test_pca)

In [33]:
# Evaluate the model
print("RandomForest with PCA Accuracy:", accuracy_score(y_test, y_pred_pca))
print("Classification Report:\n", classification_report(y_test, y_pred_pca))

RandomForest with PCA Accuracy: 0.9033333333333333
Classification Report:
               precision    recall  f1-score   support

           1       0.89      0.89      0.89        45
           2       0.91      0.90      0.91       114
           3       0.89      0.90      0.90        91
           4       0.92      0.92      0.92        50

    accuracy                           0.90       300
   macro avg       0.90      0.90      0.90       300
weighted avg       0.90      0.90      0.90       300



In [34]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred_pca)
conf_matrix = confusion_matrix(y_test, y_pred_pca)
class_report = classification_report(y_test, y_pred_pca)

In [35]:
print(f'Accuracy: {accuracy:.4f}')
print('Confusion Matrix:')
print(conf_matrix)
print('Classification Report:')
print(class_report)


Accuracy: 0.9033
Confusion Matrix:
[[ 40   5   0   0]
 [  5 103   6   0]
 [  0   5  82   4]
 [  0   0   4  46]]
Classification Report:
              precision    recall  f1-score   support

           1       0.89      0.89      0.89        45
           2       0.91      0.90      0.91       114
           3       0.89      0.90      0.90        91
           4       0.92      0.92      0.92        50

    accuracy                           0.90       300
   macro avg       0.90      0.90      0.90       300
weighted avg       0.90      0.90      0.90       300

