In [9]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split



In [13]:
df = pd.read_csv('../csv_files/body_measurements.csv')
df.head()

Unnamed: 0,Gender,Age,HeadCircumference,ShoulderWidth,ChestWidth,Belly,Waist,Hips,ArmLength,ShoulderToWaist,WaistToKnee,LegLength,TotalHeight
0,1.0,30,22,18,20,18,14,22,22,25,25,22,52
1,1.0,28,19,22,17,18,21,25,28,23,25,20,56
2,2.0,27,21,18,16,14,10,15,21,18,14,18,53
3,1.0,29,20,20,18,11,19,14,24,21,20,21,45
4,2.0,28,16,14,18,13,11,30,25,22,32,13,47


In [14]:

# Clean column names
df.columns = df.columns.str.strip()

# Fill missing Gender value with mode
df['Gender'].fillna(df['Gender'].mode()[0], inplace=True)

# Create ratios
df['Shoulder_Hip_Ratio'] = df['ShoulderWidth'] / df['Hips']
df['Waist_Hip_Ratio'] = df['Waist'] / df['Hips']

# Define function to classify body type
def classify_body_type(row):
    shr = row['Shoulder_Hip_Ratio']
    whr = row['Waist_Hip_Ratio']
    waist = row['Waist']
    shoulders = row['ShoulderWidth']
    hips = row['Hips']
    
    if abs(shoulders - hips) <= 2 and whr < 0.75:
        return 'Hourglass'
    elif shr < 0.95 and hips > shoulders:
        return 'Triangle'
    elif shr > 1.05 and shoulders > hips:
        return 'Inverted Triangle'
    elif abs(shoulders - hips) <= 2 and whr >= 0.75:
        return 'Rectangle'
    elif waist > shoulders and waist > hips:
        return 'Oval'
    else:
        return 'Undefined'

# Apply classification
df['BodyType'] = df.apply(classify_body_type, axis=1)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Gender'].fillna(df['Gender'].mode()[0], inplace=True)


In [5]:
# Keep only relevant columns
columns_to_keep = [
    'Gender', 'Age', 
    'ShoulderWidth', 'Waist', 'Hips', 
    'Shoulder_Hip_Ratio', 'Waist_Hip_Ratio', 
    'BodyType'
]

df = df[columns_to_keep]

# Display the first few rows of the cleaned dataframe
df.head()


Unnamed: 0,Gender,Age,ShoulderWidth,Waist,Hips,Shoulder_Hip_Ratio,Waist_Hip_Ratio,BodyType
0,1.0,30,18,14,22,0.818182,0.636364,Triangle
1,1.0,28,22,21,25,0.88,0.84,Triangle
2,2.0,27,18,10,15,1.2,0.666667,Inverted Triangle
3,1.0,29,20,19,14,1.428571,1.357143,Inverted Triangle
4,2.0,28,14,11,30,0.466667,0.366667,Triangle


In [15]:
csv_path = '../csv_files/body_measurements_cleaned.csv'
df.to_csv(csv_path, index=False)

### Random Forest Classifier

In [None]:
# Add preprocessing cell
nb.cells.append(nbformat.v4.new_code_cell(preprocessing_code))

# Add Random Forest model cell
rf_model_code = """
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

# Encode BodyType labels
le = LabelEncoder()
df['BodyType_Label'] = le.fit_transform(df['BodyType'])

# Define features and target
X = df[['Gender', 'Age', 'ShoulderWidth', 'Waist', 'Hips']]
y = df['BodyType_Label']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Predict on test set
y_pred = rf.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\\nClassification Report:\\n", classification_report(y_test, y_pred))
print("\\nConfusion Matrix:\\n", confusion_matrix(y_test, y_pred))

# Save the model and label encoder
joblib.dump(rf, 'body_type_rf_model.pkl')
joblib.dump(le, 'body_type_label_encoder.pkl')
"""

nb.cells.append(nbformat.v4.new_code_cell(rf_model_code))

# Save the fully updated notebook
final_notebook_path = '/mnt/data/data_preprocessing_with_model.ipynb'
with open(final_notebook_path, 'w') as f:
    nbformat.write(nb, f)

final_notebook_path
Result
'/mnt/data/data_preprocessing_with_model.ipynb'