In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import joblib

# 1 Load Preprocessed Data
file_path = "gym_recommendation_processed_deduped.csv"
df = pd.read_csv(file_path)

# 2 Separate Features & Target Variable
target_column = "Fitness Type"

# Features: Age, Height, Weight
input_features = ["Age", "Height", "Weight"]
X = df[input_features]
y = df[target_column]

# 3 Split Data into Training (80%) and Testing (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4 Train Decision Tree Classifier (ID3 Algorithm)
dt_model = DecisionTreeClassifier(criterion="entropy", random_state=42)
dt_model.fit(X_train, y_train)

# 5 Train Support Vector Machine (SVM - Linear Kernel)
svm_model = SVC(kernel="linear", random_state=42)
svm_model.fit(X_train, y_train)

# 6 Cross-Validation Evaluation
dt_cv_scores = cross_val_score(dt_model, X, y, cv=5)
svm_cv_scores = cross_val_score(svm_model, X, y, cv=5)

print("\nDecision Tree Cross-Validation Scores:", dt_cv_scores)
print("SVM Cross-Validation Scores:", svm_cv_scores)

print(f"Decision Tree Average Cross-Validation Accuracy: {dt_cv_scores.mean():.2f}")
print(f"SVM Average Cross-Validation Accuracy: {svm_cv_scores.mean():.2f}")

# 7 Generate Cross-Validated Predictions for Classification Reports
y_pred_dt_cv = cross_val_predict(dt_model, X, y, cv=5)
y_pred_svm_cv = cross_val_predict(svm_model, X, y, cv=5)

# 8 Classification Reports for Cross-Validated Predictions
print("\nDecision Tree Cross-Validation Classification Report:\n", classification_report(y, y_pred_dt_cv))
print("\nSVM Cross-Validation Classification Report:\n", classification_report(y, y_pred_svm_cv))

# 9 Save the Best Model for Future Predictions
best_model = dt_model if dt_cv_scores.mean() > svm_cv_scores.mean() else svm_model
joblib.dump(best_model, "best_fitness_model.pkl")

# Print the best model that was saved
if best_model == dt_model:
    print("✅ Decision Tree model saved for future predictions.")
else:
    print("✅ SVM model saved for future predictions.")

# 10 Function to Predict Fitness Type from User Input
def predict_fitness(user_input):
    model = joblib.load("best_fitness_model.pkl")
    input_df = pd.DataFrame([user_input], columns=input_features)
    prediction = model.predict(input_df)[0]
    return prediction

# 11 Get User Input
try:
    age = float(input("Enter Age: "))
    height = float(input("Enter Height: "))
    weight = float(input("Enter Weight: "))

    user_input = {
        "Age": age,
        "Height": height,
        "Weight": weight
    }

    # 12 Predict Fitness Type
    predicted_fitness_num = predict_fitness(user_input)
    # Map Numerical Prediction to Labels
    if predicted_fitness_num == 0:
        predicted_fitness_label = "Cardio"
    else:
        predicted_fitness_label = "Muscular"

    print("Predicted Fitness Type:", predicted_fitness_label)

except ValueError:
    print("Invalid input. Please enter numerical values for Age, Height, and Weight.")

#note:
We trained both a Decision Tree and an SVM model, and chose SVM for better accuracy. The predicted fitness label from the SVM is used by the generative models (TinyLlama and DistilGPT2) to create explanations and warning, so both are combined in the same folder.

---------------------------------------------------------------------------------------------------------------------------------------------

#Generative AI code :

In [None]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder

# ==========================
# 🔢 Load & Train SVM Model
# ==========================
df = pd.read_csv("gym_recommendation_processed_deduped.csv")
features = ["Age", "Height", "Weight"]
X = df[features]
y = df["Fitness Type"]

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

svm_model = SVC(kernel="linear")
svm_model.fit(X, y_encoded)

# ===============================
# 📥 Get User Input
# ===============================
try:
    age = float(input("Enter Age: "))
    height = float(input("Enter Height (cm): "))
    weight = float(input("Enter Weight (kg): "))
except ValueError:
    print("Invalid input. Please enter numeric values.")
    exit()

user_input_df = pd.DataFrame([[age, height, weight]], columns=features)
encoded_prediction = svm_model.predict(user_input_df)[0]
predicted_label = label_encoder.inverse_transform([encoded_prediction])[0]

print(f"\n🎯 Predicted Fitness Type: {predicted_label}")

# ================================
# 📝 Prompt Templates
# ================================
def template_explanation(age, height, weight, label):
    return (
        f"I'm {age} years old, {height} cm tall, and weigh {weight} kg. "
        f"My fitness type is '{label}'. Can you explain how physical metrics like these contribute to such a classification?"
    )

def template_warning(age, height, weight, label):
    return (
        f"User: I'm {age} years old, {height} cm tall, and weigh {weight} kg. "
        f"My fitness type is '{label}'. Are there any potential health risks or warnings I should be aware of?\nAI:"
    )

prompt_explanation = template_explanation(age, height, weight, predicted_label)
prompt_warning = template_warning(age, height, weight, predicted_label)

# ===============================
# 🧙🏼 TinyLlama Setup
# ===============================
tiny_model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tiny_tokenizer = AutoTokenizer.from_pretrained(tiny_model_id)
tiny_model = AutoModelForCausalLM.from_pretrained(tiny_model_id).to("cuda" if torch.cuda.is_available() else "cpu")

def format_chat_prompt(user_prompt):
    return f"<|system|>\nYou are a helpful fitness assistant.\n<|user|>\n{user_prompt}<|assistant|>\n"

def generate_llama_response(prompt):
    inputs = tiny_tokenizer(format_chat_prompt(prompt), return_tensors="pt").to(tiny_model.device)
    outputs = tiny_model.generate(
        **inputs,
        max_new_tokens=300,
        do_sample=False,
        repetition_penalty=1.2
    )
    return tiny_tokenizer.decode(outputs[0], skip_special_tokens=True).split("<|assistant|>")[-1].strip()

llama_response_explanation = generate_llama_response(prompt_explanation)
llama_response_warning = generate_llama_response(prompt_warning)

print("\n💬 🧙🏼 TinyLlama Prompt (Explanation):")
print(prompt_explanation)
print("\n💬 🧙🏼 TinyLlama Response:")
print(llama_response_explanation)

print("\n" + "="*70)
print("\n💬 🧙🏼 TinyLlama Prompt (Warning):")
print(prompt_warning)
print("\n💬 🧙🏼 TinyLlama Response:")
print(llama_response_warning)

# ===============================
# 🤖 DistilGPT2 Setup (No approval needed)
# ===============================
distilgpt2_model_id = "distilgpt2"
distilgpt2_tokenizer = AutoTokenizer.from_pretrained(distilgpt2_model_id)
distilgpt2_model = AutoModelForCausalLM.from_pretrained(distilgpt2_model_id).to("cuda" if torch.cuda.is_available() else "cpu")

def generate_distilgpt2_response(prompt):
    input_ids = distilgpt2_tokenizer.encode(prompt, return_tensors="pt").to(distilgpt2_model.device)
    outputs = distilgpt2_model.generate(
        input_ids,
        max_length=320,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        pad_token_id=distilgpt2_tokenizer.eos_token_id,
        no_repeat_ngram_size=3
    )
    response = distilgpt2_tokenizer.decode(outputs[0], skip_special_tokens=True)
    response_clean = response.replace(prompt, "").strip()
    return response_clean if response_clean else "[⚠️ No meaningful response generated.]"

gpt2_response_explanation = generate_distilgpt2_response(prompt_explanation)
gpt2_response_warning = generate_distilgpt2_response(prompt_warning)

print("\n" + "="*70)
print("\n💬 🤖 DistilGPT2 Prompt (Explanation):")
print(prompt_explanation)
print("\n💬 🤖 DistilGPT2 Response:")
print(gpt2_response_explanation)

print("\n" + "="*70)
print("\n💬 🤖 DistilGPT2 Prompt (Warning):")
print(prompt_warning)
print("\n💬 🤖 DistilGPT2 Response:")
print(gpt2_response_warning)

# ===============================
# 📌 Template Justification
# ===============================
print("\n" + "="*70)
print("\n📌 Explanation of Template Differences & Justification:\n")
print("- Template 1 (Explanation): Helps the user understand how input features affect classification.")
print("- Template 2 (Warning): Informs the user about potential health risks based on their fitness profile.")
print("🧠 Preferred: Warning template, because it helps users stay aware of important health considerations.")
