In [None]:
# Phase 3: Machine Learning Models

# ==========================
# 1. Import libraries
# ==========================
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, classification_report, mean_absolute_error, r2_score

# ==========================
# 2. Load dataset
# ==========================
df = pd.read_csv("fitness_and_workout_dataset_cleaned.csv")

# ==========================
# 3. Encode categorical variables
# ==========================
label_encoders = {}
for col in df.select_dtypes(include=["object"]).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# ==========================
# 4. Classification Model: Predict goal_type
# ==========================
if "goal_type" in df.columns:
    # Features & Target
    X = df.drop(columns=["goal_type"])
    y = df["goal_type"]

    # Split Data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train Model
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    clf.fit(X_train, y_train)

    # Predictions
    y_pred = clf.predict(X_test)

    # Evaluation
    print("\n🔹 Classification Model Performance:")
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))

# ==========================
# 5. Regression Model: Predict time_per_workout
# ==========================
if "time_per_workout" in df.columns:
    # Features & Target
    X = df.drop(columns=["time_per_workout"])
    y = df["time_per_workout"]

    # Split Data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train Model
    reg = RandomForestRegressor(n_estimators=100, random_state=42)
    reg.fit(X_train, y_train)

    # Predictions
    y_pred = reg.predict(X_test)

    # Evaluation
    print("\n🔹 Regression Model Performance:")
    print(f"Mean Absolute Error (MAE): {mean_absolute_error(y_test, y_pred):.2f}")
    print(f"R² Score: {r2_score(y_test, y_pred):.2f}")

# ==========================
# 6. Save Models (Optional)
# ==========================
import joblib
if "clf" in globals():
    joblib.dump(clf, "goal_type_classifier.pkl")
if "reg" in globals():
    joblib.dump(reg, "time_per_workout_regressor.pkl")
joblib.dump(label_encoders, "label_encoders.pkl")
print("\n✅ Models saved successfully.")



🔹 Regression Model Performance:
Mean Absolute Error (MAE): 0.70
R² Score: 0.99


NameError: name 'clf' is not defined