In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn import metrics
from sklearn.svm import SVC
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.ensemble import RandomForestRegressor

import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('exercises.csv')
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
sns.scatterplot(x='Height', y='Weight', data=df)
plt.show()

In [None]:
features = ['Age', 'Height', 'Weight', 'Duration']

# Set up the figure size
plt.figure(figsize=(15, 10))

# Loop through the features and create scatter plots
for i, col in enumerate(features):
    plt.subplot(2, 2, i + 1)
    x = df.sample(1000) if len(df) > 1000 else df  # Sample 1000 rows or use all rows if less than 1000
    sns.scatterplot(x=col, y='Calories', data=x)
    plt.title(f'{col} vs Calories')

# Adjust layout to prevent overlap
plt.tight_layout()

# Display the plots
plt.show()

In [None]:
features = df.select_dtypes(include='float').columns

plt.subplots(figsize=(15, 10))
for i, col in enumerate(features):
    plt.subplot(2, 3, i + 1)
    sns.distplot(df[col])
plt.tight_layout()
plt.show()

In [None]:
df.replace({'male': 0, 'female': 1},
		inplace=True)
df.head()

In [None]:
plt.figure(figsize=(8, 8))
sns.heatmap(df.corr() > 0.9,
		annot=True,
		cbar=False)
plt.show()

In [None]:
to_remove = ['Weight', 'Duration']

# Check if the columns to remove exist in the DataFrame
existing_columns = df.columns.intersection(to_remove)

# Drop the existing columns
df.drop(existing_columns, axis=1, inplace=True)

In [None]:
features = df.drop(['User_ID', 'Calories'], axis=1)
target = df['Calories'].values

X_train, X_val,\
Y_train, Y_val = train_test_split(features, target,
									test_size=0.1,
									random_state=22)
X_train.shape, X_val.shape


In [None]:
# Normalizing the features for stable and fast training.
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

In [None]:
from sklearn.metrics import mean_absolute_error as mae
models = [LinearRegression(), XGBRegressor(),
		Lasso(), RandomForestRegressor(), Ridge()]

for i in range(5):
	models[i].fit(X_train, Y_train)

	print(f'{models[i]} : ')

	train_preds = models[i].predict(X_train)
	print('Training Error : ', mae(Y_train, train_preds))

	val_preds = models[i].predict(X_val)
	print('Validation Error : ', mae(Y_val, val_preds))
	print()