# supervised learning:


In [17]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import joblib

# 1 Load Preprocessed Data
file_path = "gym_recommendation_processed_deduped.csv"
df = pd.read_csv(file_path)


# 2 Separate Features & Target Variable
target_column = "Fitness Type"
X = df.drop(columns=[target_column])  # Features (user attributes)
y = df[target_column]  # Target variable (Fitness Type)

# 3 Split Data into Training (80%) and Testing (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4 Train Decision Tree Classifier (ID3 Algorithm)
dt_model = DecisionTreeClassifier(criterion="entropy", random_state=42)
dt_model.fit(X_train, y_train)

# 5 Train Support Vector Machine (SVM - Linear Kernel)
svm_model = SVC(kernel="linear", random_state=42)
svm_model.fit(X_train, y_train)

# 6 Evaluate Both Models
## Predictions
y_pred_dt = dt_model.predict(X_test)
y_pred_svm = svm_model.predict(X_test)

## Accuracy Scores
accuracy_dt = accuracy_score(y_test, y_pred_dt)
accuracy_svm = accuracy_score(y_test, y_pred_svm)

print(f"Decision Tree Accuracy: {accuracy_dt:.2f}")
print(f"SVM Accuracy: {accuracy_svm:.2f}")

## Detailed Performance Report
print("\nDecision Tree Classification Report:\n", classification_report(y_test, y_pred_dt))
print("\nSVM Classification Report:\n", classification_report(y_test, y_pred_svm))

# 7 Save the Best Model for Future Predictions
best_model = dt_model if accuracy_dt > accuracy_svm else svm_model
joblib.dump(best_model, "best_fitness_model.pkl")
print("✅ Best model saved for future predictions.")

# 8 Function to Predict Fitness Type from User Input
def predict_fitness(user_input):
    model = joblib.load("best_fitness_model.pkl")  # Load saved model
    input_df = pd.DataFrame([user_input], columns=X.columns)  # Convert input to DataFrame
    prediction = model.predict(input_df)[0]  # Get prediction
    return prediction

# Example Usage (Test with a Sample Input)
sample_user = {
    "Sex": 1, "Level": 2, "Fitness Goal": 0, "Hypertension": 0, "Diabetes": 0, 
    "Age": 0.5, "Height": 0.7, "Weight": 0.6, "BMI": 0.4
}
print("Predicted Fitness Type:", predict_fitness(sample_user))

Decision Tree Accuracy: 1.00
SVM Accuracy: 1.00

Decision Tree Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      1213
           1       1.00      1.00      1.00      1196

    accuracy                           1.00      2409
   macro avg       1.00      1.00      1.00      2409
weighted avg       1.00      1.00      1.00      2409


SVM Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      1213
           1       1.00      1.00      1.00      1196

    accuracy                           1.00      2409
   macro avg       1.00      1.00      1.00      2409
weighted avg       1.00      1.00      1.00      2409

✅ Best model saved for future predictions.
Predicted Fitness Type: 1


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import joblib



# 1 Load Preprocessed Data
file_path = "gym_recommendation_processed_deduped.csv"
df = pd.read_csv(file_path)

# 2 Remove Features Causing Data Leakage
df = df.drop(columns=[ "Hypertension","Diabetes","Level","Fitness Goal","Sex","Height","BMI"])

# 3 Separate Features & Target Variable
target_column = "Fitness Type"
X = df.drop(columns=[target_column])  # Features (user attributes)
y = df[target_column]  # Target variable (Fitness Type)

# 4 Split Data into Training (80%) and Testing (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5 Train Decision Tree Classifier (ID3 Algorithm)
dt_model = DecisionTreeClassifier(criterion="entropy", random_state=42)
dt_model.fit(X_train, y_train)

# 6 Train Support Vector Machine (SVM - Linear Kernel)
svm_model = SVC(kernel="linear", random_state=42)
svm_model.fit(X_train, y_train)

# 7 Evaluate Both Models
## Predictions
y_pred_dt = dt_model.predict(X_test)
y_pred_svm = svm_model.predict(X_test)

## Accuracy Scores
accuracy_dt = accuracy_score(y_test, y_pred_dt)
accuracy_svm = accuracy_score(y_test, y_pred_svm)

print(f"Decision Tree Accuracy: {accuracy_dt:.2f}")
print(f"SVM Accuracy: {accuracy_svm:.2f}")

## Detailed Performance Report
print("\nDecision Tree Classification Report:\n", classification_report(y_test, y_pred_dt))
print("\nSVM Classification Report:\n", classification_report(y_test, y_pred_svm))

# 8 Save the Best Model for Future Predictions
best_model = dt_model if accuracy_dt > accuracy_svm else svm_model
joblib.dump(best_model, "best_fitness_model.pkl")
print("✅ Best model saved for future predictions.")

# 9 Function to Predict Fitness Type from User Input
def predict_fitness(user_input):
    model = joblib.load("best_fitness_model.pkl")  # Load saved model
    input_df = pd.DataFrame([user_input], columns=X.columns)  # Convert input to DataFrame
    prediction = model.predict(input_df)[0]  # Get prediction
    return prediction

# Function to check for overfitting
def check_overfitting(model, X_train, y_train, X_test, y_test):
    # Calculate training accuracy
    train_accuracy = accuracy_score(y_train, model.predict(X_train))
    
    # Calculate testing accuracy
    test_accuracy = accuracy_score(y_test, model.predict(X_test))
    
    # Check for overfitting
    if train_accuracy - test_accuracy > 0.1:  # You can adjust the threshold (0.1) based on your use case
        print(f"Warning: Overfitting detected! Train accuracy: {train_accuracy:.2f}, Test accuracy: {test_accuracy:.2f}")
    else:
        print(f"No overfitting detected. Train accuracy: {train_accuracy:.2f}, Test accuracy: {test_accuracy:.2f}")
    
# 10 Check Overfitting for Both Models
check_overfitting(dt_model, X_train, y_train, X_test, y_test)
check_overfitting(svm_model, X_train, y_train, X_test, y_test)



# Example Usage (Test with a Sample Input)
sample_user = {
 "Weight":0.4 ,"Age":0.6 
}
print("Predicted Fitness Type:", predict_fitness(sample_user))



Logistic Regression Accuracy: 0.93

Logistic Regression Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.92      0.93      1213
           1       0.92      0.93      0.93      1196

    accuracy                           0.93      2409
   macro avg       0.93      0.93      0.93      2409
weighted avg       0.93      0.93      0.93      2409

Decision Tree Accuracy: 0.97
SVM Accuracy: 0.93

Decision Tree Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.97      0.97      1213
           1       0.97      0.98      0.97      1196

    accuracy                           0.97      2409
   macro avg       0.97      0.97      0.97      2409
weighted avg       0.97      0.97      0.97      2409


SVM Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.95      0.93      1213
           1       0.94      0.91      0.

In [2]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression  # Import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import joblib

# 1 Load Preprocessed Data
file_path = "gym_recommendation_processed_deduped.csv"
df = pd.read_csv(file_path)

# 2 Remove Features Causing Data Leakage
df = df.drop(columns=[ "Hypertension", "Diabetes", "Level", "Fitness Goal",  "BMI"])

# 3 Separate Features & Target Variable
target_column = "Fitness Type"
X = df.drop(columns=[target_column])  # Features (user attributes)
y = df[target_column]  # Target variable (Fitness Type)

# 4 Split Data into Training (80%) and Testing (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5 Train Logistic Regression Model (instead of Decision Tree)
logreg_model = LogisticRegression(random_state=42, max_iter=1000)  # Adjusted for convergence
logreg_model.fit(X_train, y_train)

# 6 Train Support Vector Machine (SVM - Linear Kernel)
svm_model = SVC(kernel="linear", random_state=42)
svm_model.fit(X_train, y_train)

# 7 Evaluate Both Models
## Predictions
y_pred_logreg = logreg_model.predict(X_test)
y_pred_svm = svm_model.predict(X_test)

## Accuracy Scores
accuracy_logreg = accuracy_score(y_test, y_pred_logreg)
accuracy_svm = accuracy_score(y_test, y_pred_svm)

print(f"Logistic Regression Accuracy: {accuracy_logreg:.2f}")
print(f"SVM Accuracy: {accuracy_svm:.2f}")

## Detailed Performance Report
print("\nLogistic Regression Classification Report:\n", classification_report(y_test, y_pred_logreg))
print("\nSVM Classification Report:\n", classification_report(y_test, y_pred_svm))

# 8 Save the Best Model for Future Predictions
best_model = logreg_model if accuracy_logreg > accuracy_svm else svm_model
joblib.dump(best_model, "best_fitness_model.pkl")
print("✅ Best model saved for future predictions.")

# 9 Function to Predict Fitness Type from User Input
def predict_fitness(user_input):
    model = joblib.load("best_fitness_model.pkl")  # Load saved model
    input_df = pd.DataFrame([user_input], columns=X.columns)  # Convert input to DataFrame
    prediction = model.predict(input_df)[0]  # Get prediction
    return prediction

# Function to check for overfitting
def check_overfitting(model, X_train, y_train, X_test, y_test):
    # Calculate training accuracy
    train_accuracy = accuracy_score(y_train, model.predict(X_train))
    
    # Calculate testing accuracy
    test_accuracy = accuracy_score(y_test, model.predict(X_test))
    
    # Check for overfitting
    if train_accuracy - test_accuracy > 0.1:  # You can adjust the threshold (0.1) based on your use case
        print(f"Warning: Overfitting detected! Train accuracy: {train_accuracy:.2f}, Test accuracy: {test_accuracy:.2f}")
    else:
        print(f"No overfitting detected. Train accuracy: {train_accuracy:.2f}, Test accuracy: {test_accuracy:.2f}")

# 10 Check Overfitting for Both Models
check_overfitting(logreg_model, X_train, y_train, X_test, y_test)
check_overfitting(svm_model, X_train, y_train, X_test, y_test)

# Example Usage (Test with a Sample Input)
sample_user = {
 "Weight": 0.4, "Age": 0.6 ,"Sex":0 , "Height":0.8
}
print("Predicted Fitness Type:", predict_fitness(sample_user))


Logistic Regression Accuracy: 0.99
SVM Accuracy: 0.99

Logistic Regression Classification Report:
               precision    recall  f1-score   support

           0       0.99      1.00      0.99      1213
           1       1.00      0.99      0.99      1196

    accuracy                           0.99      2409
   macro avg       0.99      0.99      0.99      2409
weighted avg       0.99      0.99      0.99      2409


SVM Classification Report:
               precision    recall  f1-score   support

           0       0.98      1.00      0.99      1213
           1       1.00      0.98      0.99      1196

    accuracy                           0.99      2409
   macro avg       0.99      0.99      0.99      2409
weighted avg       0.99      0.99      0.99      2409

✅ Best model saved for future predictions.
No overfitting detected. Train accuracy: 0.99, Test accuracy: 0.99
No overfitting detected. Train accuracy: 0.99, Test accuracy: 0.99
Predicted Fitness Type: 1


In [1]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier  # Import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import joblib

# 1 Load Preprocessed Data
file_path = "gym_recommendation_processed_deduped.csv"
df = pd.read_csv(file_path)

# 2 Remove Features Causing Data Leakage
df = df.drop(columns=[ "Hypertension", "Diabetes", "Level", "Fitness Goal", "BMI"])

# 3 Separate Features & Target Variable
target_column = "Fitness Type"
X = df.drop(columns=[target_column])  # Features (user attributes)
y = df[target_column]  # Target variable (Fitness Type)

# 4 Split Data into Training (80%) and Testing (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5 Train Gradient Boosting Classifier (instead of Logistic Regression)
gb_model = GradientBoostingClassifier(random_state=42)
gb_model.fit(X_train, y_train)

# 6 Train Support Vector Machine (SVM - Linear Kernel)
svm_model = SVC(kernel="linear", random_state=42)
svm_model.fit(X_train, y_train)

# 7 Evaluate Both Models
## Predictions
y_pred_gb = gb_model.predict(X_test)
y_pred_svm = svm_model.predict(X_test)

## Accuracy Scores
accuracy_gb = accuracy_score(y_test, y_pred_gb)
accuracy_svm = accuracy_score(y_test, y_pred_svm)

print(f"Gradient Boosting Accuracy: {accuracy_gb:.2f}")
print(f"SVM Accuracy: {accuracy_svm:.2f}")

## Detailed Performance Report
print("\nGradient Boosting Classification Report:\n", classification_report(y_test, y_pred_gb))
print("\nSVM Classification Report:\n", classification_report(y_test, y_pred_svm))

# 8 Save the Best Model for Future Predictions
best_model = gb_model if accuracy_gb > accuracy_svm else svm_model
joblib.dump(best_model, "best_fitness_model.pkl")
print("✅ Best model saved for future predictions.")

# 9 Function to Predict Fitness Type from User Input
def predict_fitness(user_input):
    model = joblib.load("best_fitness_model.pkl")  # Load saved model
    input_df = pd.DataFrame([user_input], columns=X.columns)  # Convert input to DataFrame
    prediction = model.predict(input_df)[0]  # Get prediction
    return prediction

# Function to check for overfitting
def check_overfitting(model, X_train, y_train, X_test, y_test):
    # Calculate training accuracy
    train_accuracy = accuracy_score(y_train, model.predict(X_train))
    
    # Calculate testing accuracy
    test_accuracy = accuracy_score(y_test, model.predict(X_test))
    
    # Check for overfitting
    if train_accuracy - test_accuracy > 0.1:  # You can adjust the threshold (0.1) based on your use case
        print(f"Warning: Overfitting detected! Train accuracy: {train_accuracy:.2f}, Test accuracy: {test_accuracy:.2f}")
    else:
        print(f"No overfitting detected. Train accuracy: {train_accuracy:.2f}, Test accuracy: {test_accuracy:.2f}")

# 10 Check Overfitting for Both Models
check_overfitting(gb_model, X_train, y_train, X_test, y_test)
check_overfitting(svm_model, X_train, y_train, X_test, y_test)

# Example Usage (Test with a Sample Input)
sample_user = {
 "Weight": 0.4, "Age": 0.6 ,"Sex":0 , "Height":0.8
}
print("Predicted Fitness Type:", predict_fitness(sample_user))


Gradient Boosting Accuracy: 1.00
SVM Accuracy: 0.99

Gradient Boosting Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      1213
           1       1.00      1.00      1.00      1196

    accuracy                           1.00      2409
   macro avg       1.00      1.00      1.00      2409
weighted avg       1.00      1.00      1.00      2409


SVM Classification Report:
               precision    recall  f1-score   support

           0       0.98      1.00      0.99      1213
           1       1.00      0.98      0.99      1196

    accuracy                           0.99      2409
   macro avg       0.99      0.99      0.99      2409
weighted avg       0.99      0.99      0.99      2409

✅ Best model saved for future predictions.
No overfitting detected. Train accuracy: 1.00, Test accuracy: 1.00
No overfitting detected. Train accuracy: 0.99, Test accuracy: 0.99
Predicted Fitness Type: 1


In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import joblib

# 1 Load Preprocessed Data
file_path = "gym_recommendation_processed_deduped.csv"
df = pd.read_csv(file_path)

df = df.drop(columns=[ "Hypertension", "Diabetes", "Level", "Fitness Goal", "BMI" ,"Sex"])

# 2 Separate Features & Target Variable
target_column = "Fitness Type"
X = df.drop(columns=[target_column])  # Features (user attributes)
y = df[target_column]  # Target variable (Fitness Type)

# 3 Split Data into Training (80%) and Testing (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4 Train Decision Tree Classifier (ID3 Algorithm)
dt_model = DecisionTreeClassifier(criterion="entropy", random_state=42)
dt_model.fit(X_train, y_train)

# 5 Train Support Vector Machine (SVM - Linear Kernel)
svm_model = SVC(kernel="linear", random_state=42)
svm_model.fit(X_train, y_train)

# 6 Evaluate Both Models
## Predictions
y_pred_dt = dt_model.predict(X_test)
y_pred_svm = svm_model.predict(X_test)

## Accuracy Scores
accuracy_dt = accuracy_score(y_test, y_pred_dt)
accuracy_svm = accuracy_score(y_test, y_pred_svm)

print(f"Decision Tree Accuracy: {accuracy_dt:.2f}")
print(f"SVM Accuracy: {accuracy_svm:.2f}")

## Detailed Performance Report
print("\nDecision Tree Classification Report:\n", classification_report(y_test, y_pred_dt))
print("\nSVM Classification Report:\n", classification_report(y_test, y_pred_svm))

# 7 Save the Best Model for Future Predictions
best_model = dt_model if accuracy_dt > accuracy_svm else svm_model
joblib.dump(best_model, "best_fitness_model.pkl")
print("✅ Best model saved for future predictions.")

# 8 Function to Predict Fitness Type from User Input
def predict_fitness(user_input):
    model = joblib.load("best_fitness_model.pkl")  # Load saved model
    input_df = pd.DataFrame([user_input], columns=X.columns)  # Convert input to DataFrame
    prediction = model.predict(input_df)[0]  # Get prediction
    return prediction

# Example Usage (Test with a Sample Input)
sample_user = {
    "Weight": 0.4, "Age": 0.6  , "Height":0.8
}
print("Predicted Fitness Type:", predict_fitness(sample_user))

Decision Tree Accuracy: 1.00
SVM Accuracy: 0.99

Decision Tree Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      1213
           1       1.00      1.00      1.00      1196

    accuracy                           1.00      2409
   macro avg       1.00      1.00      1.00      2409
weighted avg       1.00      1.00      1.00      2409


SVM Classification Report:
               precision    recall  f1-score   support

           0       0.98      1.00      0.99      1213
           1       1.00      0.98      0.99      1196

    accuracy                           0.99      2409
   macro avg       0.99      0.99      0.99      2409
weighted avg       0.99      0.99      0.99      2409

✅ Best model saved for future predictions.
Predicted Fitness Type: 1
