In [1]:
# Import libraries for analysis
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Import dataset
dodgers = pd.read_csv(r'dodgers-2022.csv')

In [3]:
# View the first five rows of the data
dodgers.head()

Unnamed: 0,month,day,attend,day_of_week,opponent,temp,skies,day_night,cap,shirt,fireworks,bobblehead
0,APR,10,56000,Tuesday,Pirates,67,Clear,Day,NO,NO,NO,NO
1,APR,11,29729,Wednesday,Pirates,58,Cloudy,Night,NO,NO,NO,NO
2,APR,12,28328,Thursday,Pirates,57,Cloudy,Night,NO,NO,NO,NO
3,APR,13,31601,Friday,Padres,54,Cloudy,Night,NO,NO,YES,NO
4,APR,14,46549,Saturday,Padres,57,Cloudy,Night,NO,NO,NO,NO


In [4]:
# Remove unneeded features
dodgers.drop(columns=['month', 'day', 'day_of_week', 'opponent'], inplace=True)

In [5]:
# View data to ensure features are removed
dodgers.head()

Unnamed: 0,attend,temp,skies,day_night,cap,shirt,fireworks,bobblehead
0,56000,67,Clear,Day,NO,NO,NO,NO
1,29729,58,Cloudy,Night,NO,NO,NO,NO
2,28328,57,Cloudy,Night,NO,NO,NO,NO
3,31601,54,Cloudy,Night,NO,NO,YES,NO
4,46549,57,Cloudy,Night,NO,NO,NO,NO


In [6]:
# Split data into X and y variables
X = dodgers.drop(('attend'), axis=1)
y = dodgers['attend']

In [7]:
# Apply one-hot-encoding to X variable
X = pd.get_dummies(X)

In [8]:
# Convert True and False to 1 and 0
X = X.replace({True:1, False:0})

  X = X.replace({True:1, False:0})


In [9]:
# View the first five rows to ensure one-hot-encoding is properly applied
X.head()

Unnamed: 0,temp,skies_Clear,skies_Cloudy,day_night_Day,day_night_Night,cap_NO,cap_YES,shirt_NO,shirt_YES,fireworks_NO,fireworks_YES,bobblehead_NO,bobblehead_YES
0,67,1,0,1,0,1,0,1,0,1,0,1,0
1,58,0,1,0,1,1,0,1,0,1,0,1,0
2,57,0,1,0,1,1,0,1,0,1,0,1,0
3,54,0,1,0,1,1,0,1,0,0,1,1,0
4,57,0,1,0,1,1,0,1,0,1,0,1,0


<h2><center>Standard Linear Regression</center></h2>

In [10]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=10)

In [11]:
# View the dimension of X train
X_train.shape

(64, 13)

In [12]:
# View the dimension of y train
y_train.shape

(64,)

In [13]:
# Create linear regression object
lm = LinearRegression()

In [14]:
# Fit the data to the linear regression
model = lm.fit(X_train, y_train)

In [15]:
# Create prediction object
y_pred = model.predict(X_test)

In [16]:
r_2 = r2_score(y_test, y_pred)
r_2

0.38320882861995753

<h2><center>Linear Regression with Standard Scaler</center></h2>

In [17]:
scaler = StandardScaler()

In [18]:
X_train_S = scaler.fit_transform(X_train)

In [19]:
X_test_S = scaler.fit_transform(X_test)

In [20]:
lm_S = LinearRegression()

In [21]:
model_S= lm_S.fit(X_train_S, y_train)

In [22]:
y_pred_S = model_S.predict(X_test_S)

In [23]:
r_2_S = r2_score(y_test, y_pred_S)

In [24]:
r_2_S


0.3721143908976967

<h2><center>Linear Regression with Min/Max Scaling</center></h2>

In [25]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=15)

In [26]:
X_train.shape

(64, 13)

In [27]:
X_test.shape

(17, 13)

In [28]:
min_max= MinMaxScaler()

In [29]:
X_train_m = min_max.fit_transform(X_train)

In [30]:
X_test_m = min_max.fit_transform(X_test)

In [31]:
lm_m = LinearRegression()

In [32]:
model_m = lm_m.fit(X_train_m, y_train)

In [33]:
y_pred_m = model_m.predict(X_test_m)

In [34]:
r_2_m = r2_score(y_test, y_pred_m)

In [35]:
r_2_m

0.06413427460784782

<h2><center>K-Nearest-Neighbor with Min/Max Scaling</center></h2>

In [36]:
X = pd.get_dummies(X)

In [37]:
X.head()

Unnamed: 0,temp,skies_Clear,skies_Cloudy,day_night_Day,day_night_Night,cap_NO,cap_YES,shirt_NO,shirt_YES,fireworks_NO,fireworks_YES,bobblehead_NO,bobblehead_YES
0,67,1,0,1,0,1,0,1,0,1,0,1,0
1,58,0,1,0,1,1,0,1,0,1,0,1,0
2,57,0,1,0,1,1,0,1,0,1,0,1,0
3,54,0,1,0,1,1,0,1,0,0,1,1,0
4,57,0,1,0,1,1,0,1,0,1,0,1,0


In [38]:
X.replace({True:1, False:0},inplace=True)

In [39]:
X.head()

Unnamed: 0,temp,skies_Clear,skies_Cloudy,day_night_Day,day_night_Night,cap_NO,cap_YES,shirt_NO,shirt_YES,fireworks_NO,fireworks_YES,bobblehead_NO,bobblehead_YES
0,67,1,0,1,0,1,0,1,0,1,0,1,0
1,58,0,1,0,1,1,0,1,0,1,0,1,0
2,57,0,1,0,1,1,0,1,0,1,0,1,0
3,54,0,1,0,1,1,0,1,0,0,1,1,0
4,57,0,1,0,1,1,0,1,0,1,0,1,0


In [40]:
min_max_k = MinMaxScaler()

In [41]:
X_train_k = min_max_k.fit_transform(X_train)

In [42]:
X_test_k = min_max_k.fit_transform(X_test)