In [1]:
import numpy as np

In [2]:
import pandas as pd

In [3]:
import seaborn as sns

In [4]:
import matplotlib.pyplot as plt

In [7]:
calories = pd.read_csv('calories.csv')
exercise = pd.read_csv('exercise.csv')

In [8]:
df = exercise.merge(calories, on='User_ID')

In [9]:
df['Gender'] = df['Gender'].map({'male': 1, 'female': 0})

In [10]:
X = df.drop(['User_ID', 'Calories'], axis=1)
y = df['Calories']

In [11]:
from sklearn.model_selection import train_test_split

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso

In [14]:
from sklearn.tree import DecisionTreeRegressor

In [15]:
from sklearn.ensemble import RandomForestRegressor

In [16]:
from sklearn.metrics import r2_score, mean_squared_error

In [17]:
models = {
    'lr': LinearRegression(),
    'rd': Ridge(),
    'ls': Lasso(),
    'dtr': DecisionTreeRegressor(),
    'rfr': RandomForestRegressor()
}

In [18]:
for name, mod in models.items():
    mod.fit(X_train, y_train)
    y_pred = mod.predict(X_test)

    print(f"{name}  MSE: {mean_squared_error(y_test, y_pred)}, Score: {r2_score(y_test, y_pred)}")

lr  MSE: 131.99574575081698, Score: 0.9672937151257295
rd  MSE: 131.99625903139344, Score: 0.9672935879435945
ls  MSE: 143.82689461175062, Score: 0.9643621590908397
dtr  MSE: 27.835333333333335, Score: 0.9931028811846033
rfr  MSE: 7.513042466666667, Score: 0.9981383967658233


In [None]:
rfr = RandomForestRegressor()
rfr.fit(X_train, y_train)
y_pred = rfr.predict(X_test)

import pickle

pickle.dump(rfr, open('rfr.pkl', 'wb'))
X_train.to_csv('X_train.csv')