In [None]:
# Load Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import pickle

In [None]:
# Load Data from Google Drive
!gdown --id YOUR_GOOGLE_DRIVE_ID

In [None]:
# Exploratory Data Analysis (EDA)
df = pd.read_csv('data.csv')
# Display first few rows of the dataset
df.head()

In [None]:
# Boxplot for visualizing distribution
sns.boxplot(x='BeatsPerMinute', data=df)
plt.title('Boxplot of Beats Per Minute')
plt.show()

In [None]:
# Correlation Matrix
corr = df.corr()
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

In [None]:
# Outlier Removal using IQR Method
Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1
df = df[~((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).any(axis=1)]

In [None]:
# Data Normalization
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
df_normalized = scaler.fit_transform(df)

In [None]:
# Model Setup
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
X = df_normalized.drop('BeatsPerMinute', axis=1)
y = df_normalized['BeatsPerMinute']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestRegressor()
model.fit(X_train, y_train)
pickle.dump(model, open('model.pkl', 'wb'))