In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [3]:
df = pd.read_csv(r"diet_exercise_calorise_recommandation Dataset .csv")
df.head()

Unnamed: 0,Age,Gender,Height,Weight,BMI,Fitness Level,Fitness Goal,Medical History,Diet Recommended,BMR,Calories,Exercise
0,59,F,4.9,42,18.928228,Underweight,weight gain,none,High calorie,1051.38,1997.622,Lunge
1,58,F,4.9,42,18.928228,Underweight,weight gain,none,High calorie,1056.08,2006.552,Lunge
2,59,F,5.0,42,18.17867,Underweight,weight gain,none,High calorie,1057.02,2008.338,Lunge
3,57,F,4.9,42,18.928228,Underweight,weight gain,none,High calorie,1060.78,2015.482,Lunge
4,59,F,4.9,43,19.3789,Normal weight,muscle building,none,High Protein Diet,1060.84,1644.302,Resistance training


In [4]:
le = LabelEncoder()

In [5]:
df['Gender'] = le.fit_transform(df['Gender'])
df['Fitness Level'] = le.fit_transform(df['Fitness Level'])
df['Fitness Goal'] = le.fit_transform(df['Fitness Goal'])
df['Medical History'] = le.fit_transform(df['Medical History'])

In [6]:
features = ['Age','Gender','Height','Weight','BMI','Fitness Level','Fitness Goal','Medical History','Calories']
target = 'Diet Recommended'

In [7]:
x=df[features]
y=df[target]

In [8]:
# Encode target labels
y = le.fit_transform(y)

In [9]:
# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [10]:
dt_model = DecisionTreeClassifier()

In [11]:
dt_model.fit(X_train, y_train)

In [12]:
dt_pred_train =  dt_model.predict(X_train)

In [13]:
# Model accuracy
accuracy = accuracy_score(dt_pred_train, y_train)
print(f"Train Accuracy: {accuracy:.2f}")

Train Accuracy: 1.00


In [14]:
dt_pred_test =  dt_model.predict(X_test)
# Model accuracy
accuracy = accuracy_score(dt_pred_test, y_test)
print(f"Train Accuracy: {accuracy:.2f}")

Train Accuracy: 1.00


In [15]:
X_train.columns

Index(['Age', 'Gender', 'Height', 'Weight', 'BMI', 'Fitness Level',
       'Fitness Goal', 'Medical History', 'Calories'],
      dtype='object')

In [16]:
import numpy as np
from sklearn.preprocessing import LabelEncoder

# Initialize label encoders for each categorical feature
le_gender = LabelEncoder()
le_fitness_level = LabelEncoder()
le_fitness_goal = LabelEncoder()
le_medical_history = LabelEncoder()

# Fit the label encoders with all possible categories
categories_gender = ["M", "F"]
categories_fitness_level = ["Underweight", "Normal", "Overweight"]
categories_fitness_goal = ["gain", "lose", "maintain"]
categories_medical_history = ["none", "diabetes", "heart disease"]

le_gender.fit(categories_gender)
le_fitness_level.fit(categories_fitness_level)
le_fitness_goal.fit(categories_fitness_goal)
le_medical_history.fit(categories_medical_history)

# Sample input data
xinput = np.array([[59, 'F', 4.9, 42, 18.928228, 'Underweight', 'gain', 'none', 1997.622]])

# Encode the categorical values
xinput[:, 1] = le_gender.transform(xinput[:, 1])
xinput[:, 5] = le_fitness_level.transform(xinput[:, 5])
xinput[:, 6] = le_fitness_goal.transform(xinput[:, 6])
xinput[:, 7] = le_medical_history.transform(xinput[:, 7])

# Convert xinput to float type for prediction
xinput = xinput.astype(float)

# Assuming dt_model is your trained DecisionTree model
predictions = dt_model.predict(xinput)


decoded_prediction = le.inverse_transform(predictions)

print(f"The prediction for the sample input is: {predictions}")


The prediction for the sample input is: [23]




In [20]:
import numpy as np
from sklearn.preprocessing import LabelEncoder

# Initialize label encoders
le_gender = LabelEncoder()
le_fitness_level = LabelEncoder()
le_fitness_goal = LabelEncoder()
le_medical_history = LabelEncoder()

# Fit label encoders with predefined categories
categories_gender = ["M", "F"]
categories_fitness_level = ["Underweight", "Normal", "Overweight"]
categories_fitness_goal = ["gain", "lose", "maintain"]
categories_medical_history = ["none", "diabetes", "heart disease"]

le_gender.fit(categories_gender)
le_fitness_level.fit(categories_fitness_level)
le_fitness_goal.fit(categories_fitness_goal)
le_medical_history.fit(categories_medical_history)

def predict_diet(dt_model, le, input_data):
    """
    Predicts diet recommendations based on input features.
    
    Parameters:
    - dt_model: Trained Decision Tree model
    - le: Label encoder for the target variable
    - input_data: List containing input feature values
    
    Returns:
    - Predicted diet recommendation
    """
    xinput = np.array([input_data], dtype=object)  # Ensure it's a 2D array

    # Encode categorical values
    xinput[:, 1] = le_gender.transform(xinput[:, 1])
    xinput[:, 5] = le_fitness_level.transform(xinput[:, 5])
    xinput[:, 6] = le_fitness_goal.transform(xinput[:, 6])
    xinput[:, 7] = le_medical_history.transform(xinput[:, 7])

    # Convert to float type
    xinput = xinput.astype(float)

    # Make prediction
    prediction = dt_model.predict(xinput)

    # Decode prediction
    decoded_prediction = le.inverse_transform(prediction)

    return decoded_prediction[0]

# Example usage:
predicted_diet = predict_diet(dt_model, le, [59, 'F', 4.9, 42, 18.928228, 'Underweight', 'gain', 'none', 1997.622])
print(f"Predicted diet: {predicted_diet}")


Predicted diet: Purine Care & Weight Gain Harmony




In [None]:
decoded_prediction

array(['Purine Care & Weight Gain Harmony'], dtype=object)

In [19]:
import joblib

# Save the model to a file
joblib.dump(dt_model, 'Diet Recommandation.pkl')

['Diet Recommandation.pkl']