In [32]:
import pandas as pd

# Load the datasets
megagym_df = pd.read_csv("MegaGymDataBase.csv")
gymrec_df = pd.read_csv("GymRecommendation.csv")

# Display basic info
megagym_df.head() 



Title          object
Desc           object
Type           object
BodyPart       object
Equipment      object
Level          object
Rating        float64
RatingDesc     object
dtype: object

In [33]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Fill missing values (if any)
gymrec_df.fillna(method='ffill', inplace=True)  

# Encode categorical variables
label_encoders = {}
categorical_columns = ["Sex", "Fitness Goal", "Fitness Type", "Exercises", "Equipment", "Diet"]

for col in categorical_columns:
    le = LabelEncoder()
    gymrec_df[col] = le.fit_transform(gymrec_df[col])
    label_encoders[col] = le  # Save encoder for later use

# Normalize numerical columns
scaler = StandardScaler()
numerical_columns = ["Age", "Height", "Weight", "BMI"]
gymrec_df[numerical_columns] = scaler.fit_transform(gymrec_df[numerical_columns])

print("Preprocessing complete!")

Preprocessing complete!


  gymrec_df.fillna(method='ffill', inplace=True)


In [34]:
X = gymrec_df[["Sex", "Age", "Height", "Weight", "BMI", "Hypertension", "Diabetes", "Level", "Fitness Goal"]]
y = gymrec_df[["Exercises", "Diet"]]

In [35]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [36]:
print(X_train.dtypes)
print(X_train.head())

Sex               int64
Age             float64
Height          float64
Weight          float64
BMI             float64
Hypertension     object
Diabetes         object
Level            object
Fitness Goal      int64
dtype: object
       Sex       Age    Height    Weight       BMI Hypertension Diabetes  \
13082    1  0.791765  0.612985  1.172944  0.876941          Yes      Yes   
5502     0 -0.193665  1.428268  1.722122  0.974504          Yes       No   
1570     1 -1.027491 -0.508029 -1.148580 -1.029957           No      Yes   
9159     0  0.261149  0.918716 -1.523019 -1.764630          Yes      Yes   
9581     1  1.246579 -1.527133 -0.574439  0.021054          Yes       No   

             Level  Fitness Goal  
13082        Obuse             1  
5502         Obuse             1  
1570   Underweight             0  
9159   Underweight             0  
9581        Normal             0  


In [37]:
print("Columns in X_train:", X_train.columns)

Columns in X_train: Index(['Sex', 'Age', 'Height', 'Weight', 'BMI', 'Hypertension', 'Diabetes',
       'Level', 'Fitness Goal'],
      dtype='object')


In [38]:
X_train.columns = X_train.columns.str.strip()  # Remove extra spaces

In [39]:
categorical_columns = [col for col in ["Sex", "Fitness Goal", "Fitness Type", "Exercises", "Equipment", "Diet"] if col in X_train.columns]
print("Final categorical columns:", categorical_columns)

Final categorical columns: ['Sex', 'Fitness Goal']


In [40]:
from sklearn.preprocessing import LabelEncoder

label_encoders = {}

for col in categorical_columns:
    le = LabelEncoder()
    X_train[col] = le.fit_transform(X_train[col])
    X_test[col] = le.transform(X_test[col])
    label_encoders[col] = le  # Store encoders for later use

print("Categorical features encoded successfully!")


Categorical features encoded successfully!


In [41]:
# Encoding target variables
label_enc_exercise = LabelEncoder()
label_enc_diet = LabelEncoder()

y_train_exercise = label_enc_exercise.fit_transform(y_train["Exercises"])
y_train_diet = label_enc_diet.fit_transform(y_train["Diet"])

y_test_exercise = label_enc_exercise.transform(y_test["Exercises"])
y_test_diet = label_enc_diet.transform(y_test["Diet"])

print("Target encoding complete!")


Target encoding complete!


In [42]:
print("X_train column data types:\n", X_train.dtypes)
print("\nUnique values in each column:\n")
for col in X_train.columns:
    print(f"{col}: {X_train[col].unique()}")

X_train column data types:
 Sex               int64
Age             float64
Height          float64
Weight          float64
BMI             float64
Hypertension     object
Diabetes         object
Level            object
Fitness Goal      int64
dtype: object

Unique values in each column:

Sex: [1 0]
Age: [ 0.79176539 -0.19366516 -1.027491    0.26114894  1.24657948 -0.95168865
 -1.1790957   1.77719593 -0.64847925  1.32238183 -0.34526986  0.33695129
  1.01917243 -0.11786281  1.54978888 -0.49687455 -0.5726769  -0.8758863
  0.10954424  1.62559123 -1.4823051   0.03374189 -0.7242816   1.39818418
  1.70139358  1.47398653 -1.10329335 -0.80008395  0.71596304 -0.42107221
  0.56435834 -0.04206046  0.64016069 -1.25489805 -1.6339098   0.86756774
  0.94337009  1.17077713 -1.3307004  -1.40650275  0.18534659  0.48855599
 -0.26946751  0.41275364 -1.55810745  1.09497478]
Height: [ 6.12985049e-01  1.42826802e+00 -5.08029032e-01  9.18716162e-01
 -1.52713274e+00  1.52282274e-03 -4.06118661e-01 -1.01758089e

In [43]:
from sklearn.preprocessing import LabelEncoder

# Identify columns that are still categorical
categorical_columns = X_train.select_dtypes(include=['object']).columns
print("Categorical columns to encode:", categorical_columns)

# Apply Label Encoding
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    X_train[col] = le.fit_transform(X_train[col])
    X_test[col] = le.transform(X_test[col])  # Ensure test data is encoded the same way
    label_encoders[col] = le  # Store for decoding later

print("✅ All categorical features encoded successfully!\n")
print(X_train.head())  # Verify that all values are numeric

Categorical columns to encode: Index(['Hypertension', 'Diabetes', 'Level'], dtype='object')
✅ All categorical features encoded successfully!

       Sex       Age    Height    Weight       BMI  Hypertension  Diabetes  \
13082    1  0.791765  0.612985  1.172944  0.876941             1         1   
5502     0 -0.193665  1.428268  1.722122  0.974504             1         0   
1570     1 -1.027491 -0.508029 -1.148580 -1.029957             0         1   
9159     0  0.261149  0.918716 -1.523019 -1.764630             1         1   
9581     1  1.246579 -1.527133 -0.574439  0.021054             1         0   

       Level  Fitness Goal  
13082      1             1  
5502       1             1  
1570       3             0  
9159       3             0  
9581       0             0  


In [44]:
# Encode target variables (Exercises and Diet)
label_enc_exercise = LabelEncoder()
label_enc_diet = LabelEncoder()

y_train_exercise = label_enc_exercise.fit_transform(y_train["Exercises"])
y_train_diet = label_enc_diet.fit_transform(y_train["Diet"])

y_test_exercise = label_enc_exercise.transform(y_test["Exercises"])
y_test_diet = label_enc_diet.transform(y_test["Diet"])

print("✅ Target variables encoded successfully!")

✅ Target variables encoded successfully!


In [45]:
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Encode target variables (Exercises and Diet)
label_enc_exercise = LabelEncoder()
label_enc_diet = LabelEncoder()

y_train_exercise = label_enc_exercise.fit_transform(y_train["Exercises"])
y_train_diet = label_enc_diet.fit_transform(y_train["Diet"])

y_test_exercise = label_enc_exercise.transform(y_test["Exercises"])
y_test_diet = label_enc_diet.transform(y_test["Diet"])

# Train separate models
exercise_model = RandomForestClassifier(n_estimators=100, random_state=42)
diet_model = RandomForestClassifier(n_estimators=100, random_state=42)

exercise_model.fit(X_train, y_train_exercise)
diet_model.fit(X_train, y_train_diet)

# Predictions
y_pred_exercise = exercise_model.predict(X_test)
y_pred_diet = diet_model.predict(X_test)

# Evaluate accuracy
exercise_accuracy = accuracy_score(y_test_exercise, y_pred_exercise)
diet_accuracy = accuracy_score(y_test_diet, y_pred_diet)

print(f"Exercise Model Accuracy: {exercise_accuracy}")
print(f"Diet Model Accuracy: {diet_accuracy}")

Exercise Model Accuracy: 0.997943797121316
Diet Model Accuracy: 0.9479095270733379


In [46]:
from sklearn.neighbors import NearestNeighbors

# Train a recommendation model
knn = NearestNeighbors(n_neighbors=5, metric='euclidean')
knn.fit(X_train)

# Predict recommendations for a sample user
sample_user = X_test.iloc[0].values.reshape(1, -1)
distances, indices = knn.kneighbors(sample_user)

# Show recommendations
print("Recommended Exercises and Diet:")
print(y_train.iloc[indices[0]])

Recommended Exercises and Diet:
       Exercises  Diet
14295          0     1
14039          0     1
14262          0     1
14550          0     1
14552          0     1




In [47]:
import joblib

joblib.dump(model, "fitness_recommender.pkl")

['fitness_recommender.pkl']

In [49]:
import joblib

# Load the model
model = joblib.load("fitness_recommender.pkl")
print("Loaded Model:", model)

Loaded Model: RandomForestClassifier(random_state=42)


In [50]:
from sklearn.ensemble import RandomForestClassifier
import joblib

# Train the model again (assuming X_train and y_train are available)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train_exercise)  # Ensure correct dataset

# Save properly
joblib.dump(model, "fitness_recommender.pkl")
print("Model saved successfully!")

Model saved successfully!


In [52]:
print("X_train shape:", X_train.shape)
print("Feature Names:", X_train.columns)

X_train shape: (11671, 9)
Feature Names: Index(['Sex', 'Age', 'Height', 'Weight', 'BMI', 'Hypertension', 'Diabetes',
       'Level', 'Fitness Goal'],
      dtype='object')


In [53]:
from sklearn.preprocessing import LabelEncoder
import numpy as np

# Load encoders used during training
label_enc_goal = LabelEncoder()
label_enc_goal.fit(X_train["Fitness Goal"])  # Fit with training data

# Convert categorical input values using the same encoding
fitness_goal_encoded = label_enc_goal.transform([1])  # Example input: "1"

# Ensure input is correctly formatted
sample_input = np.array([[25, 175, 70, fitness_goal_encoded[0], 0]])

print("Transformed input:", sample_input)


Transformed input: [[ 25 175  70   1   0]]


In [55]:
print("X_train shape:", X_train.shape)
print("Feature Names:", list(X_train.columns))


X_train shape: (11671, 9)
Feature Names: ['Sex', 'Age', 'Height', 'Weight', 'BMI', 'Hypertension', 'Diabetes', 'Level', 'Fitness Goal']


In [56]:
sample_input = np.array([[25, 175, 70, 1, 0, 0, 0, 22.8, 2]])  
# Example values: Age, Height, Weight, Fitness Goal, Duration, Hypertension, Diabetes, BMI, Level


In [57]:
from sklearn.preprocessing import LabelEncoder
import numpy as np

# Load the encoders (assuming they were used in training)
label_enc_goal = LabelEncoder()
label_enc_level = LabelEncoder()

# Fit with the original training data categories
label_enc_goal.fit(X_train["Fitness Goal"])
label_enc_level.fit(X_train["Level"])

# Transform categorical input values
fitness_goal_encoded = label_enc_goal.transform([1])[0]  
level_encoded = label_enc_level.transform([2])[0]  

# Corrected input with all features
sample_input = np.array([[25, 175, 70, fitness_goal_encoded, 0, 0, 0, 22.8, level_encoded]])

print("Updated input shape:", sample_input.shape)


Updated input shape: (1, 9)


In [58]:
import joblib

# Load the saved model
model = joblib.load("fitness_recommender.pkl")

# Make the prediction
prediction = model.predict(sample_input)

print("Recommended Plan:", prediction)


Recommended Plan: [0]




Expected input shape: (1, 5)




ValueError: X has 5 features, but RandomForestClassifier is expecting 9 features as input.

In [48]:
import joblib
import numpy as np

# Load the saved model
model = joblib.load("fitness_recommender.pkl")

# Sample input (Replace with real user data)
sample_input = np.array([[25, 175, 70, 1, 0]])  # Example: Age, Height, Weight, FitnessGoal, Duration

# Predict recommendation
prediction = model.predict(sample_input)
print("Recommended Plan:", prediction)

AttributeError: 'RandomForestClassifier' object has no attribute 'estimators_'

In [None]:
# Assuming `label_enc_exercise` was used for training
recommended_plan = label_enc_exercise.inverse_transform(prediction)
print("Recommended Plan:", recommended_plan)


Recommended Plan: [0]
