In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.utils.class_weight import compute_class_weight
import joblib

In [15]:
df = pd.read_csv('../csv_files/body_measurements_cleaned.csv')
df.head()

Unnamed: 0,Gender,Age,HeadCircumference,ShoulderWidth,ChestWidth,Belly,Waist,Hips,ArmLength,ShoulderToWaist,WaistToKnee,LegLength,TotalHeight,Shoulder_Hip_Ratio,Waist_Hip_Ratio,BodyType
0,1.0,30,22,18,20,18,14,22,22,25,25,22,52,0.818182,0.636364,Triangle
1,1.0,28,19,22,17,18,21,25,28,23,25,20,56,0.88,0.84,Triangle
2,2.0,27,21,18,16,14,10,15,21,18,14,18,53,1.2,0.666667,Inverted Triangle
3,1.0,29,20,20,18,11,19,14,24,21,20,21,45,1.428571,1.357143,Inverted Triangle
4,2.0,28,16,14,18,13,11,30,25,22,32,13,47,0.466667,0.366667,Triangle


In [18]:

USE_SMOTE = True
if USE_SMOTE:
    from imblearn.over_sampling import SMOTE
df = pd.read_csv('../csv_files/body_measurements_cleaned.csv')

In [None]:
# ======== Label Encoding =========
le = LabelEncoder()
df['BodyType_Label'] = le.fit_transform(df['BodyType'])

# ======== Features & Target =========
X = df[['Gender', 'Age', 'ShoulderWidth', 'Waist', 'Hips']]
y = df['BodyType_Label']

# ======== Stratified Splits =========
X_temp, X_unseen, y_temp, y_unseen = train_test_split(X, y, test_size=0.1, stratify=y, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X_temp, y_temp, test_size=2/9, stratify=y_temp, random_state=42)

# ======== Class Balancing & Training =========
if USE_SMOTE:
    smote = SMOTE(random_state=42)
    X_train, y_train = smote.fit_resample(X_train, y_train)
    rf = RandomForestClassifier(n_estimators=100, random_state=42)
    rf.fit(X_train, y_train)
else:
    class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
    weight_dict = {i: w for i, w in zip(np.unique(y_train), class_weights)}
    rf = RandomForestClassifier(n_estimators=100, class_weight=weight_dict, random_state=42)
    rf.fit(X_train, y_train)

# ======== Evaluate =========
y_pred = rf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

# ======== Save Outputs =========
joblib.dump(rf, "body_type_rf_model.pkl")
joblib.dump(le, "body_type_label_encoder.pkl")
print("\nModel and label encoder saved.")

FileNotFoundError: [Errno 2] No such file or directory: 'your_dataset.csv'