In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

df = pd.read_csv("data/diet_recommendations_dataset.csv")

# Minimal features
features = ['Age', 'Weight_kg', 'Height_cm', 'BMI', 'Daily_Caloric_Intake']
target = 'Diet_Recommendation'

df = df[features + [target]].dropna()

le = LabelEncoder()
df[target] = le.fit_transform(df[target])
joblib.dump(le, 'diet_label_encoder.pkl')

X = df[features]
y = df[target]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
joblib.dump(scaler, 'diet_scaler.pkl')

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Logistic Regression
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
y_pred_lr = logreg.predict(X_test)
print("Logistic Regression Accuracy:", accuracy_score(y_test, y_pred_lr))
print(classification_report(y_test, y_pred_lr))
joblib.dump(logreg, "logreg_model.pkl")

# Random Forest
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))
joblib.dump(rf, "rf_model.pkl")


Logistic Regression Accuracy: 0.415
              precision    recall  f1-score   support

           0       0.44      0.93      0.59        86
           1       0.00      0.00      0.00        47
           2       0.18      0.04      0.07        67

    accuracy                           0.41       200
   macro avg       0.20      0.33      0.22       200
weighted avg       0.25      0.41      0.28       200



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Random Forest Accuracy: 0.335
              precision    recall  f1-score   support

           0       0.41      0.57      0.48        86
           1       0.14      0.11      0.12        47
           2       0.30      0.19      0.23        67

    accuracy                           0.34       200
   macro avg       0.28      0.29      0.28       200
weighted avg       0.31      0.34      0.31       200



['rf_model.pkl']

In [9]:
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

df_diets = pd.read_csv("data/All_Diets.csv")
df_diets = df_diets[['Protein(g)', 'Carbs(g)', 'Fat(g)']].dropna()

scaler_diets = StandardScaler()
X_diets = scaler_diets.fit_transform(df_diets)
joblib.dump(scaler_diets, 'diet_macro_scaler.pkl')

kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X_diets)
df_diets['Cluster'] = kmeans.labels_

print("KMeans clustering done for All_Diets.csv")
joblib.dump(kmeans, "kmeans_diets_model.pkl")


KMeans clustering done for All_Diets.csv


['kmeans_diets_model.pkl']

In [10]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors

df_recipes = pd.read_csv("data/Food_Recipe.csv")
df_recipes = df_recipes[['name', 'description', 'cuisine']].dropna()

tfidf = TfidfVectorizer(stop_words='english')
X_recipes = tfidf.fit_transform(df_recipes['description'])
joblib.dump(tfidf, "tfidf_recipes.pkl")

nn = NearestNeighbors(n_neighbors=5, metric='cosine')
nn.fit(X_recipes)
joblib.dump(nn, "recipe_nn_model.pkl")
df_recipes.to_csv("processed_recipes.csv", index=False)  # needed for querying later

print("TF-IDF based recipe recommendation model trained.")


TF-IDF based recipe recommendation model trained.


In [11]:
df_fitness = pd.read_csv("data/Fitness_Routine.csv")
df_fitness = df_fitness[['Title', 'Desc', 'Type', 'BodyPart', 'Equipment', 'Level']]
df_fitness.dropna(inplace=True)
df_fitness.to_csv("processed_fitness.csv", index=False)
print("Fitness dataset cleaned and saved.")


Fitness dataset cleaned and saved.
