In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

In [8]:
df = pd.read_csv('../csv_files/body_measurements_cleaned.csv')
df.head()

Unnamed: 0,Gender,Age,HeadCircumference,ShoulderWidth,ChestWidth,Belly,Waist,Hips,ArmLength,ShoulderToWaist,WaistToKnee,LegLength,TotalHeight,Shoulder_Hip_Ratio,Waist_Hip_Ratio,BodyType
0,1.0,30,22,18,20,18,14,22,22,25,25,22,52,0.818182,0.636364,Triangle
1,1.0,28,19,22,17,18,21,25,28,23,25,20,56,0.88,0.84,Triangle
2,2.0,27,21,18,16,14,10,15,21,18,14,18,53,1.2,0.666667,Inverted Triangle
3,1.0,29,20,20,18,11,19,14,24,21,20,21,45,1.428571,1.357143,Inverted Triangle
4,2.0,28,16,14,18,13,11,30,25,22,32,13,47,0.466667,0.366667,Triangle


In [9]:
# Encode BodyType labels
le = LabelEncoder()
df['BodyType_Label'] = le.fit_transform(df['BodyType'])

# Define features and target
X = df[['Gender', 'Age', 'ShoulderWidth', 'Waist', 'Hips']]
y = df['BodyType_Label']

# First split: 90% Train+Test, 10% Unseen
X_temp, X_unseen, y_temp, y_unseen = train_test_split(X, y, test_size=0.1, random_state=42)

# Second split: From 90%, get 70% Train and 20% Test
X_train, X_test, y_train, y_test = train_test_split(X_temp, y_temp, test_size=2/9, random_state=42)

# Save unseen data
unseen_data = X_unseen.copy()
unseen_data['BodyType_Label'] = y_unseen
unseen_data.to_csv('unseen_data.csv', index=False)

# Initialize and train Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Predict on test set
y_pred = rf.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\\nClassification Report:\\n", classification_report(y_test, y_pred))
print("\\nConfusion Matrix:\\n", confusion_matrix(y_test, y_pred))

# Save the model and label encoder
joblib.dump(rf, 'body_type_rf_model.pkl')
joblib.dump(le, 'body_type_label_encoder.pkl')

Accuracy: 0.9166666666666666
\nClassification Report:\n               precision    recall  f1-score   support

           0       1.00      0.43      0.60         7
           1       0.89      0.94      0.92        35
           2       0.67      0.25      0.36         8
           3       0.93      1.00      0.96        94

    accuracy                           0.92       144
   macro avg       0.87      0.66      0.71       144
weighted avg       0.91      0.92      0.90       144

\nConfusion Matrix:\n [[ 3  2  0  2]
 [ 0 33  1  1]
 [ 0  2  2  4]
 [ 0  0  0 94]]


['body_type_label_encoder.pkl']

In [None]:
# Save the notebook
final_cleaned_notebook = '/mnt/data/body_type_rf_from_cleaned.ipynb'
with open(final_cleaned_notebook, 'w') as f:
    nbformat.write(nb, f)

final_cleaned_notebook