<a href="https://colab.research.google.com/github/Techbarsha/fitness-tracker/blob/main/model_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install numpy==1.24.3
!pip install scikit-learn==1.2.2

Collecting scikit-learn==1.2.2
  Using cached scikit_learn-1.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Using cached scikit_learn-1.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.6 MB)
Installing collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.3.0
    Uninstalling scikit-learn-1.3.0:
      Successfully uninstalled scikit-learn-1.3.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
imbalanced-learn 0.13.0 requires scikit-learn<2,>=1.3.2, but you have scikit-learn 1.2.2 which is incompatible.
mlxtend 0.23.4 requires scikit-learn>=1.3.1, but you have scikit-learn 1.2.2 which is incompatible.[0m[31m
[0mSuccessfully installed scikit-learn-1.2.2


In [2]:
# Step 1: Install Required Libraries (Run in Google Colab)
!pip install pandas numpy matplotlib seaborn scikit-learn streamlit



# Data Processing **bold text**

In [3]:
# Step 2: Data Loading and Preprocessing
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

# Load dataset
df = pd.read_csv('/content/fitness_tracker.csv')

# Data Cleaning
df = df.drop('User_ID', axis=1)  # Remove user ID
df = df.dropna()  # Remove missing values

# Feature Engineering
# Encode categorical feature
encoder = OneHotEncoder()
intensity_encoded = encoder.fit_transform(df[['Workout_Intensity']]).toarray()
intensity_df = pd.DataFrame(intensity_encoded, columns=encoder.get_feature_names_out(['Workout_Intensity']))
df = pd.concat([df, intensity_df], axis=1).drop('Workout_Intensity', axis=1)

# Prepare data
X = df.drop('Calories_Burned', axis=1)
y = df['Calories_Burned']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
# Step 3: Model Training and Evaluation
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, r2_score

# Initialize models
models = {
    'Random Forest': RandomForestRegressor(n_estimators=100),
    'Support Vector': SVR()
}

# Train and evaluate models
best_model = None
best_score = 0

for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)

    mae = mean_absolute_error(y_test, preds)
    r2 = r2_score(y_test, preds)

    print(f"{name} Results:")
    print(f"MAE: {mae:.2f}")
    print(f"R² Score: {r2:.2f}\n")

    if r2 > best_score:
        best_score = r2
        best_model = model

# Save best model
import joblib
joblib.dump(best_model, 'calorie_model.pkl')

Random Forest Results:
MAE: 196.94
R² Score: -0.16

Support Vector Results:
MAE: 190.92
R² Score: -0.07



['calorie_model.pkl']