In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [3]:
data = pd.read_csv('GHW_Index2.csv')

In [4]:
# Function to map index numbers to category names
def map_indices_to_categories(ind):
    index_to_category = {
        0: 'Extremely Weak',
        1: 'Weak',
        2: 'Normal',
        3: 'Overweight',
        4: 'Obesity',
        5: 'Extremely Obese'
    }
    return index_to_category[ind]

In [5]:
# Apply mapping to 'Index' column
data['Index'] = data['Index'].apply(map_indices_to_categories)

In [6]:
# Get value counts for 'Gender' and 'Index' columns
gender_counts = data['Gender'].value_counts()
index_counts = data['Index'].value_counts()

In [7]:
# Create dummy variables for 'Gender'
gender_dummies = pd.get_dummies(data['Gender'])
data.drop('Gender', axis=1, inplace=True)
data = pd.concat([data, gender_dummies], axis=1)

In [8]:
# Separate target variable and features
y = data['Index']
X = data.drop(['Index'], axis=1)

In [9]:
# Standardize features using StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled)

In [10]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=101)

In [12]:
# Define parameter grid for hyperparameter tuning
param_grid = {'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 1000]}

In [13]:
# Perform GridSearchCV for hyperparameter tuning
grid_cv = GridSearchCV(RandomForestClassifier(random_state=101), param_grid, verbose=3)
grid_cv.fit(X_train, y_train)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5] END ..................n_estimators=100;, score=0.850 total time=   0.3s
[CV 2/5] END ..................n_estimators=100;, score=0.832 total time=   0.2s
[CV 3/5] END ..................n_estimators=100;, score=0.832 total time=   0.2s
[CV 4/5] END ..................n_estimators=100;, score=0.836 total time=   0.2s
[CV 5/5] END ..................n_estimators=100;, score=0.850 total time=   0.2s
[CV 1/5] END ..................n_estimators=200;, score=0.857 total time=   0.5s
[CV 2/5] END ..................n_estimators=200;, score=0.821 total time=   0.6s
[CV 3/5] END ..................n_estimators=200;, score=0.825 total time=   1.4s
[CV 4/5] END ..................n_estimators=200;, score=0.829 total time=   0.7s
[CV 5/5] END ..................n_estimators=200;, score=0.839 total time=   0.6s
[CV 1/5] END ..................n_estimators=300;, score=0.854 total time=   0.7s
[CV 2/5] END ..................n_estimators=300;,

In [14]:
# Display the best parameters obtained from grid search
print("Best Parameters:", grid_cv.best_params_)

Best Parameters: {'n_estimators': 100}


In [17]:
# Predict weight categories on the test set
y_pred = grid_cv.predict(X_test)

In [18]:
# Evaluate model performance
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred) * 100)

Classification Report:
                  precision    recall  f1-score   support

Extremely Obese       0.93      0.90      0.92       241
 Extremely Weak       1.00      0.59      0.74        17
         Normal       0.81      0.88      0.84        85
        Obesity       0.78      0.81      0.80       155
     Overweight       0.80      0.73      0.76        82
           Weak       0.62      0.80      0.70        20

       accuracy                           0.84       600
      macro avg       0.82      0.79      0.79       600
   weighted avg       0.85      0.84      0.84       600

Confusion Matrix:
 [[218   0   0  23   0   0]
 [  0  10   1   0   0   6]
 [  0   0  75   2   4   4]
 [ 17   0   1 126  11   0]
 [  0   0  12  10  60   0]
 [  0   0   4   0   0  16]]
Accuracy: 84.16666666666667


In [19]:
# Function to predict weight category for live input
def predict_weight_category(details):
    gender = details[0]
    height = details[1]
    weight = details[2]

    if gender == 'Male':
        input_data = np.array([[np.float(height), np.float(weight), 0.0, 1.0]])
    elif gender == 'Female':
        input_data = np.array([[np.float(height), np.float(weight), 1.0, 0.0]])

    y_pred = grid_cv.predict(scaler.transform(input_data))
    return y_pred[0]

In [23]:
# Live predictor example
your_details = ['Female', 178, 85]
print("Predicted Weight Category:", predict_weight_category(your_details))

Predicted Weight Category: Overweight


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  input_data = np.array([[np.float(height), np.float(weight), 1.0, 0.0]])
