In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# Load data
data = pd.read_csv('gym.csv')
X = data.drop('number_people', axis=1)  # Independent features
y = data['number_people']  # Dependent feature

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Model 1: Raw Data
model1 = LinearRegression()
model1.fit(X_train, y_train)
y_pred1 = model1.predict(X_test)

# Metrics for Model 1
mse1 = mean_squared_error(y_test, y_pred1)
mae1 = mean_absolute_error(y_test, y_pred1)
r2_1 = r2_score(y_test, y_pred1)

# Model 2: Standard Scaling + PCA
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

pca = PCA(n_components=4)
X_pca = pca.fit_transform(X_scaled)

X_train_pca, X_test_pca, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=0)

model2 = LinearRegression()
model2.fit(X_train_pca, y_train)
y_pred2 = model2.predict(X_test_pca)

# Metrics for Model 2
mse2 = mean_squared_error(y_test, y_pred2)
mae2 = mean_absolute_error(y_test, y_pred2)
r2_2 = r2_score(y_test, y_pred2)

# Print results
print("Model 1 (Raw Data) - MSE:", mse1, "MAE:", mae1, "R²:", r2_1)
print("Model 2 (Scaled + PCA) - MSE:", mse2, "MAE:", mae2, "R²:", r2_2)


Model 1 (Raw Data) - MSE: 257.66784660292143 MAE: 12.303856389460762 R²: 0.49922008138471363
Model 2 (Scaled + PCA) - MSE: 273.05647816182915 MAE: 12.785227004351512 R²: 0.46931212910712017
