<a href="https://colab.research.google.com/github/MohamedSci/AI-Powered-Personalized-Fitness-and-Nutrition-Recommendation-Engine/blob/main/AI_Powered_Personalized_Fitness_and_Nutrition_Recommendation_Engine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Production Mode**

# **Production Mode with Google Colab**

In [4]:
# -*- coding: utf-8 -*-
"""
AI-Powered Personalized Fitness and Nutrition Recommendation Engine - Production-Ready Code for Google Colab

This script implements a comprehensive AI model for generating personalized fitness and
nutrition plans, specifically designed to run on Google Colab with data files stored
in the user's Google Drive.

Author: Mohamed Said Ibrahim
Date: April 1, 2025
Version: 1.1 (Colab Specific)
"""

# --- 1. Mount Google Drive ---
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import mean_squared_error, accuracy_score, classification_report, r2_score
import joblib
import logging
import os
from typing import Dict, List, Union, Tuple
from dataclasses import dataclass

# --- 2. Configuration Management ---
@dataclass
class Configuration:
    """Configuration class to manage file paths and hyperparameters."""
    # Update this path to your Google Drive data folder
    DRIVE_ROOT: str = '/content/drive/MyDrive'
    PROJECT_FOLDER: str = 'Projects/Get_Fit_App/Ai_Model'
    DATA_FOLDER: str = 'Data_Source'
    DATA_DIR: str = os.path.join(DRIVE_ROOT, PROJECT_FOLDER, DATA_FOLDER)
    Generated_Models_FOLDER: str = 'Generated_Models'
    MODELS_DIR: str = os.path.join(DRIVE_ROOT, PROJECT_FOLDER, Generated_Models_FOLDER)
    FITNESS_LEVEL_DATA_FILE: str = 'fitness_level_data_example.csv'
    TRAINING_PARAMS_DATA_FILE: str = 'training_params_data_example.csv'
    DIETARY_NEEDS_DATA_FILE: str = 'dietary_needs_data_example.csv'
    EXERCISE_DATABASE_FILE: str = 'exercise_database_example.csv'
    FITNESS_LEVEL_MODEL_NAME: str = 'fitness_level_model.pkl'
    TRAINING_PARAMS_MODEL_NAME: str = 'training_params_model.pkl'
    DIETARY_NEEDS_MODEL_NAME: str = 'dietary_needs_model.pkl'
    N_SPLITS_CV: int = 5
    RANDOM_STATE: int = 42
    LOG_LEVEL: int = logging.INFO

# Initialize configuration and logging
config = Configuration()
os.makedirs(config.MODELS_DIR, exist_ok=True)

logging.basicConfig(level=config.LOG_LEVEL, format='%(asctime)s - %(levelname)s - %(module)s - %(message)s')

# --- 3. Data Loading and Preprocessing Module ---
class DataPreprocessor:
    """Handles loading, validation, and preprocessing of data."""
    def load_data(self, file_path: str) -> pd.DataFrame:
        """Loads data from a CSV file and performs basic validation."""
        try:
            df = pd.read_csv(file_path)
            if df.empty:
                logging.warning(f"Loaded data from {file_path} is empty.")
            return df
        except FileNotFoundError:
            logging.error(f"Data file not found at: {file_path}")
            raise
        except Exception as e:
            logging.error(f"Error loading data from {file_path}: {e}")
            raise

    def preprocess_user_data(self, df: pd.DataFrame) -> pd.DataFrame:
        """Preprocesses user data for model training or inference."""
        logging.info("Preprocessing user data...")
        # Basic data cleaning (more specific cleaning might be needed based on data)
        df.replace([np.inf, -np.inf], np.nan, inplace=True)

        # Handle missing values (impute numerical with mean, categorical with mode)
        for col in df.select_dtypes(include=np.number).columns:
            df[col].fillna(df[col].mean(), inplace=True)
        for col in df.select_dtypes(include='object').columns:
            df[col].fillna(df[col].mode()[0], inplace=True)

        # Feature Engineering (example: BMI calculation)
        if 'weight' in df.columns and 'height' in df.columns:
            df['bmi'] = df['weight'] / (df['height'] / 100)**2

        return df

    def split_data(self, df: pd.DataFrame, target_column: Union[str, List[str]], test_size: float = 0.2) -> Tuple[pd.DataFrame, pd.DataFrame, pd.Series, pd.Series]:
        """Splits data into training and testing sets."""
        X = df.drop(columns=target_column, errors='ignore')
        y = df[target_column] if isinstance(target_column, str) and target_column in df.columns else df[target_column] if isinstance(target_column, list) and all(col in df.columns for col in target_column) else None
        if y is None:
            raise ValueError(f"Target column(s) '{target_column}' not found in DataFrame.")
        return train_test_split(X, y, test_size=test_size, random_state=config.RANDOM_STATE)

    def create_preprocessing_pipeline(self, X: pd.DataFrame) -> ColumnTransformer:
        """Creates a preprocessing pipeline for numerical and categorical features."""
        numerical_features = X.select_dtypes(include=np.number).columns.tolist()
        categorical_features = X.select_dtypes(include='object').columns.tolist()

        numerical_transformer = StandardScaler()
        categorical_transformer = OneHotEncoder(handle_unknown='ignore')

        preprocessor = ColumnTransformer(
            transformers=[
                ('num', numerical_transformer, numerical_features),
                ('cat', categorical_transformer, categorical_features)])
        return preprocessor

# --- 4. Model Training Module ---
class ModelTrainer:
    """Trains and evaluates machine learning models."""
    def __init__(self, model_type: str):
        self.model_type = model_type
        self.model = self._initialize_model()

    def _initialize_model(self):
        """Initializes the appropriate model based on the model type."""
        if self.model_type == 'fitness_level':
            return RandomForestClassifier(random_state=config.RANDOM_STATE)
        elif self.model_type == 'training_params':
            return RandomForestRegressor(random_state=config.RANDOM_STATE)
        elif self.model_type == 'dietary_needs':
            return LinearRegression()
        else:
            raise ValueError(f"Unsupported model type: {self.model_type}")

    def train_model(self, X_train: pd.DataFrame, y_train: pd.Series):
        """Trains the specified model."""
        logging.info(f"Training the {self.model_type} model...")
        self.model.fit(X_train, y_train)
        logging.info(f"{self.model_type} model training complete.")

    def evaluate_model(self, X_test: pd.DataFrame, y_test: pd.Series):
        """Evaluates the trained model based on the model type."""
        logging.info(f"Evaluating the {self.model_type} model...")
        if self.model_type == 'fitness_level':
            y_pred = self.model.predict(X_test)
            accuracy = accuracy_score(y_test, y_pred)
            report = classification_report(y_test, y_pred)
            logging.info(f"{self.model_type} model accuracy: {accuracy:.4f}")
            logging.info(f"{self.model_type} model classification report:\n{report}")
            return accuracy
        elif self.model_type == 'training_params':
            y_pred = self.model.predict(X_test)
            mse = mean_squared_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)
            logging.info(f"{self.model_type} model mean squared error: {mse:.4f}")
            logging.info(f"{self.model_type} model R-squared: {r2:.4f}")
            return mse
        elif self.model_type == 'dietary_needs':
            y_pred = self.model.predict(X_test)
            mse = mean_squared_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)
            logging.info(f"{self.model_type} model mean squared error: {mse:.4f}")
            logging.info(f"{self.model_type} model R-squared: {r2:.4f}")
            return mse
        return None

    def save_model(self, filename: str):
        """Saves the trained model to the specified path."""
        model_path = os.path.join(config.MODELS_DIR, filename)
        try:
            joblib.dump(self.model, model_path)
            logging.info(f"{self.model_type} model saved to: {model_path}")
        except Exception as e:
            logging.error(f"Error saving {self.model_type} model: {e}")

    def load_model(self, filename: str):
        """Loads a trained model from the specified path."""
        model_path = os.path.join(config.MODELS_DIR, filename)
        try:
            self.model = joblib.load(model_path)
            logging.info(f"{self.model_type} model loaded from: {model_path}")
        except FileNotFoundError:
            logging.error(f"Model file not found at: {model_path}")
            raise
        except Exception as e:
            logging.error(f"Error loading {self.model_type} model: {e}")

# --- 5. Hyperparameter Tuning Module ---
class HyperparameterTuner:
    """Tunes the hyperparameters of the AI models using GridSearchCV."""
    def __init__(self, model_type: str, model, param_grid: Dict):
        self.model_type = model_type
        self.model = model
        self.param_grid = param_grid
        self.best_model = None

    def tune_hyperparameters(self, X_train: pd.DataFrame, y_train: pd.Series, scoring: str = None, cv: int = config.N_SPLITS_CV):
        """Performs hyperparameter tuning using GridSearchCV."""
        logging.info(f"Tuning hyperparameters for the {self.model_type} model...")
        grid_search = GridSearchCV(estimator=self.model, param_grid=self.param_grid, cv=cv, scoring=scoring, n_jobs=-1)
        grid_search.fit(X_train, y_train)
        self.best_model = grid_search.best_estimator_
        logging.info(f"Best hyperparameters for {self.model_type}: {grid_search.best_params_}")

    def get_best_model(self):
        """Returns the best performing model after tuning."""
        return self.best_model

# --- 6. Prediction Module ---
class PredictionEngine:
    """Handles loading trained models and making predictions."""
    def __init__(self):
        self.fitness_level_model = None
        self.training_params_model = None
        self.dietary_needs_model = None
        self.preprocessor = DataPreprocessor()
        self.feature_encoders = {} # Store fitted preprocessors

    def load_models(self):
        """Loads all necessary trained models."""
        try:
            trainer_fitness = ModelTrainer(model_type='fitness_level')
            trainer_fitness.load_model(os.path.join(config.MODELS_DIR,config.FITNESS_LEVEL_MODEL_NAME))
            self.fitness_level_model = trainer_fitness.model

            trainer_training_params = ModelTrainer(model_type='training_params')
            trainer_training_params.load_model(os.path.join(config.MODELS_DIR,config.TRAINING_PARAMS_MODEL_NAME))
            self.training_params_model = trainer_training_params.model

            trainer_dietary_needs = ModelTrainer(model_type='dietary_needs')
            trainer_dietary_needs.load_model(os.path.join(config.MODELS_DIR,config.DIETARY_NEEDS_MODEL_NAME))
            self.dietary_needs_model = trainer_dietary_needs.model

            # Load the fitted preprocessors
            self.feature_encoders['fitness_level'] = joblib.load(os.path.join(config.MODELS_DIR, 'fitness_preprocessor.pkl'))
            self.feature_encoders['training_params'] = joblib.load(os.path.join(config.MODELS_DIR, 'training_preprocessor.pkl'))
            self.feature_encoders['dietary_needs'] = joblib.load(os.path.join(config.MODELS_DIR, 'dietary_preprocessor.pkl'))

            logging.info("All models loaded successfully.")

        except FileNotFoundError as e:
            logging.error(f"Error loading models: {e}")
            raise
        except Exception as e:
            logging.error(f"An unexpected error occurred while loading models: {e}")
            raise

    def predict_fitness_level(self, user_profile: Dict) -> str:
        """Predicts the fitness level of a user."""
        if self.fitness_level_model is None or 'fitness_level' not in self.feature_encoders:
            logging.error("Fitness level model or preprocessor not loaded.")
            return "Error"
        try:
            user_df = pd.DataFrame([user_profile])
            # Ensure only features used during training are selected and ordered correctly
            feature_names = joblib.load(os.path.join(config.MODELS_DIR, 'fitness_feature_names.pkl'))
            user_df = user_df[feature_names]
            processed_data = self.feature_encoders['fitness_level'].transform(user_df)
            prediction = self.fitness_level_model.predict(processed_data)[0]
            return prediction
        except Exception as e:
            logging.error(f"Error predicting fitness level: {e}")
            return "Error"

    def predict_training_plan(self, user_profile: Dict) -> Dict:
        """Predicts the training parameters for a user."""
        if self.training_params_model is None or 'training_params' not in self.feature_encoders:
            logging.error("Training parameters model or preprocessor not loaded.")
            return {"error": "Model not loaded"}
        try:
            user_df = pd.DataFrame([user_profile])
            feature_names = joblib.load(os.path.join(config.MODELS_DIR, 'training_params_feature_names.pkl'))
            user_df = user_df[feature_names]
            processed_data = self.feature_encoders['training_params'].transform(user_df)
            prediction = self.training_params_model.predict(processed_data)[0] # Assuming output is an array
            # Map the predicted parameters to a structured training plan
            # This mapping needs to be consistent with how the model was trained
            plan = {
                "workout_frequency": "3 days per week (example)",
                "exercises": [
                    {"name": "Barbell Squat", "sets": round(prediction[0]), "repetitions": round(prediction[1])},
                    {"name": "Bench Press", "sets": round(prediction[2]), "repetitions": round(prediction[3])},
                    # ... map other predicted parameters to exercises
                ]
            }
            return plan
        except Exception as e:
            logging.error(f"Error predicting training plan: {e}")
            return {"error": str(e)}

    def predict_dietary_needs(self, user_profile: Dict) -> Dict:
        """Predicts the dietary needs (calories and macros) for a user."""
        if self.dietary_needs_model is None or 'dietary_needs' not in self.feature_encoders:
            logging.error("Dietary needs model or preprocessor not loaded.")
            return {"error": "Model not loaded"}
        try:
            user_df = pd.DataFrame([user_profile])
            feature_names = joblib.load(os.path.join(config.MODELS_DIR, 'dietary_needs_feature_names.pkl'))
            user_df = user_df[feature_names]
            processed_data = self.feature_encoders['dietary_needs'].transform(user_df)
            prediction = self.dietary_needs_model.predict(processed_data)[0] # Assuming output is [calories, protein, carbs, fat]
            needs = {
                "daily_calories": round(prediction[0]),
                "macronutrient_targets": {
                    "protein": round(prediction[1]),
                    "carbs": round(prediction[2]),
                    "fat": round(prediction[3])
                }
            }
            return needs
        except Exception as e:
            logging.error(f"Error predicting dietary needs: {e}")
            return {"error": str(e)}

# --- 7. Training Function ---
def train_models(config: Configuration):
    """Trains and saves the AI models."""
    preprocessor = DataPreprocessor()

    # --- Train Fitness Level Classification Model ---
    try:
        fitness_data = preprocessor.load_data(os.path.join(config.DATA_DIR, config.FITNESS_LEVEL_DATA_FILE))
        fitness_data = preprocessor.preprocess_user_data(fitness_data)
        X_fitness, y_fitness = fitness_data.drop('fitness_level', axis=1), fitness_data['fitness_level']
        X_train_fitness, X_test_fitness, y_train_fitness, y_test_fitness = preprocessor.split_data(fitness_data, 'fitness_level')

        fitness_preprocessor = preprocessor.create_preprocessing_pipeline(X_train_fitness)
        X_train_processed_fitness = fitness_preprocessor.fit_transform(X_train_fitness)
        X_test_processed_fitness = fitness_preprocessor.transform(X_test_fitness)

        trainer_fitness = ModelTrainer(model_type='fitness_level')
        trainer_fitness.train_model(X_train_processed_fitness, y_train_fitness)
        trainer_fitness.evaluate_model(X_test_processed_fitness, y_test_fitness)
        trainer_fitness.save_model(os.path.join(config.MODELS_DIR,config.FITNESS_LEVEL_MODEL_NAME))
        joblib.dump(fitness_preprocessor, os.path.join(config.MODELS_DIR, 'fitness_preprocessor.pkl'))
        joblib.dump(X_train_fitness.columns.tolist(), os.path.join(config.MODELS_DIR, 'fitness_feature_names.pkl'))

    except FileNotFoundError:
        logging.warning("Fitness level training data not found. Skipping training.")
    except Exception as e:
        logging.error(f"Error training fitness level model: {e}")

    # --- Train Training Parameters Regression Model ---
    try:
        training_params_data = preprocessor.load_data(os.path.join(config.DATA_DIR, config.TRAINING_PARAMS_DATA_FILE))
        training_params_data = preprocessor.preprocess_user_data(training_params_data)
        target_columns_tp = ['squat_sets', 'squat_reps', 'bench_sets', 'bench_reps'] # Example targets
        X_training_params, y_training_params = training_params_data.drop(target_columns_tp, axis=1, errors='ignore'), training_params_data[target_columns_tp]
        X_train_tp, X_test_tp, y_train_tp, y_test_tp = preprocessor.split_data(training_params_data, target_columns_tp)

        training_preprocessor = preprocessor.create_preprocessing_pipeline(X_train_tp)
        X_train_processed_tp = training_preprocessor.fit_transform(X_train_tp)
        X_test_processed_tp = training_preprocessor.transform(X_test_tp)

        trainer_training_params = ModelTrainer(model_type='training_params')
        trainer_training_params.train_model(X_train_processed_tp, y_train_tp)
        trainer_training_params.evaluate_model(X_test_processed_tp, y_test_tp)
        trainer_training_params.save_model(os.path.join(config.MODELS_DIR,config.TRAINING_PARAMS_MODEL_NAME))
        joblib.dump(training_preprocessor, os.path.join(config.MODELS_DIR, 'training_preprocessor.pkl'))
        joblib.dump(X_train_tp.columns.tolist(), os.path.join(config.MODELS_DIR, 'training_params_feature_names.pkl'))

    except FileNotFoundError:
        logging.warning("Training parameters data not found. Skipping training.")
    except Exception as e:
        logging.error(f"Error training training parameters model: {e}")

    # --- Train Dietary Needs Regression Model ---
    try:
        dietary_needs_data = preprocessor.load_data(os.path.join(config.DATA_DIR, config.DIETARY_NEEDS_DATA_FILE))
        dietary_needs_data = preprocessor.preprocess_user_data(dietary_needs_data)
        target_columns_dn = ['calories', 'protein', 'carbs', 'fat']
        X_dietary_needs, y_dietary_needs = dietary_needs_data.drop(target_columns_dn, axis=1, errors='ignore'), dietary_needs_data[target_columns_dn]
        X_train_dn, X_test_dn, y_train_dn, y_test_dn = preprocessor.split_data(dietary_needs_data, target_columns_dn)

        dietary_preprocessor = preprocessor.create_preprocessing_pipeline(X_train_dn)
        X_train_processed_dn = dietary_preprocessor.fit_transform(X_train_dn)
        X_test_processed_dn = dietary_preprocessor.transform(X_test_dn)

        trainer_dietary_needs = ModelTrainer(model_type='dietary_needs')
        trainer_dietary_needs.train_model(X_train_processed_dn, y_train_dn)
        trainer_dietary_needs.evaluate_model(X_test_processed_dn, y_test_dn)
        trainer_dietary_needs.save_model(os.path.join(config.MODELS_DIR,config.DIETARY_NEEDS_MODEL_NAME))
        joblib.dump(dietary_preprocessor, os.path.join(config.MODELS_DIR, 'dietary_preprocessor.pkl'))
        joblib.dump(X_train_dn.columns.tolist(), os.path.join(config.MODELS_DIR, 'dietary_needs_feature_names.pkl'))

    except FileNotFoundError:
        logging.warning("Dietary needs training data not found. Skipping training.")
    except Exception as e:
        logging.error(f"Error training dietary needs model: {e}")

# --- 8. Main Function to Run Predictions ---
def main():
    """Loads trained models and demonstrates prediction for a sample user."""
    prediction_engine = PredictionEngine()
    try:
        prediction_engine.load_models()

        # Example user profile (replace with actual user input)
        sample_user_profile = {
            "age": 30,
            "gender": "male",
            "weight": 80,  # kg
            "height": 180, # cm
            "exercise_capabilities": {"pushups": 10, "squats": 20, "bench_press_weight": 60},
            "country_of_residence": "Egypt",
            "target_goal": "building mass and strength"
        }

        fitness_level = prediction_engine.predict_fitness_level(sample_user_profile)
        print(f"\nPredicted Fitness Level: {fitness_level}")

        training_plan = prediction_engine.predict_training_plan(sample_user_profile)
        print("\n--- Predicted Training Plan ---")
        print(training_plan)

        dietary_needs = prediction_engine.predict_dietary_needs(sample_user_profile)
        print("\n--- Predicted Dietary Needs ---")
        print(dietary_needs)

    except Exception as e:
        logging.error(f"Error during prediction: {e}")

# --- 9. Conceptual Deployment (Illustrative) ---
def deploy_model_conceptual(config: Configuration):
    """
    Conceptual deployment function. In a real scenario, this would involve:
    - Containerization (e.g., using Docker)
    - Setting up an API endpoint (e.g., using Flask or FastAPI)
    - Deploying to a cloud platform (e.g., AWS, Google Cloud, Azure)
    """
    logging.info("Conceptual model deployment started...")
    logging.info(f"Trained models are saved in: {config.MODELS_DIR}")
    # In a real deployment, you would load these models in your API service
    # and use them to serve prediction requests.
    logging.info("Conceptual model deployment finished.")

# --- 10. Execution Block ---
if __name__ == "__main__":
    # To train the models, uncomment the following line:
    train_models(config)

    # To run predictions using the trained models:
    main()

    # To simulate deployment (conceptual):
    deploy_model_conceptual(config)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values 


Predicted Fitness Level: Error

--- Predicted Training Plan ---
{'error': '"[\'pushups\', \'squats\', \'weight_lifted_squat_max\', \'weight_lifted_bench_max\', \'bmi\'] not in index"'}

--- Predicted Dietary Needs ---
{'error': '"[\'activity_level\', \'pushups\', \'squats\', \'bmi\'] not in index"'}
