# Logistic Regression

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import confusion_matrix, accuracy_score, cohen_kappa_score, plot_confusion_matrix, f1_score, recall_score, precision_score, classification_report 

**Importing Dataset**

In [2]:
cookies = pd.read_csv('../Data/cookies_clean.csv')

In [3]:
cookies.head(3)

Unnamed: 0.1,Unnamed: 0,sugar to flour ratio,sugar index,bake temp,chill time,calories,pH,grams baking soda,bake time,quality,butter type,weight,diameter,chocolate,raisins,oats,nuts,peanut butter
0,0,0.25,9.5,300,15.0,136.0,8.1,0.44,12.1,8,1,15.2,7,0,1,0,0,0
1,1,0.23,3.3,520,34.0,113.0,8.16,0.48,8.4,7,1,12.4,7,0,1,0,0,0
2,2,0.18,1.9,360,33.0,106.0,8.21,0.83,14.0,9,1,9.4,7,1,0,0,1,0


In [4]:
cookies.drop('Unnamed: 0', axis=1, inplace=True)

**Splitting data frame**

In [5]:
X = cookies.drop(['quality', 'pH', 'chill time', 'diameter'], axis=1)
y = cookies['quality']

In [6]:
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.33, random_state=0)

In [7]:
#scaling the x values
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scale = scaler.transform(X_train)
X_test_scale = scaler.transform(X_test)

In [8]:
log_model = LogisticRegression(max_iter=700)

In [9]:
log_model.fit(X_train_scale, y_train)

LogisticRegression(max_iter=700)

In [10]:
y_pred = log_model.predict(X_test_scale)

In [11]:
def model_scores(y_test, y_pred):
    print('R2:', round(r2_score(y_test, y_pred), 3))
    print('Accuracy Score:', round(accuracy_score(y_test, y_pred), 3))
    print('Cohen Kappa Score:', round(cohen_kappa_score(y_test, y_pred), 3))
    print('MSE:', round(mean_squared_error(y_test, y_pred), 3))

In [12]:
model_scores(y_test, y_pred)

R2: 0.625
Accuracy Score: 0.546
Cohen Kappa Score: 0.39
MSE: 0.654


# KN Regressor

In [13]:
#with Standard Scaler
from sklearn.neighbors import KNeighborsRegressor

X = cookies.drop(['quality'], axis=1)
y = cookies['quality']

X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=0)

#scaling the x values
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scale = scaler.transform(X_train)
X_test_scale = scaler.transform(X_test)

for k in range(1, 20):
    Knn = KNeighborsRegressor(n_neighbors = k)
    Knn.fit(X_train_scale, y_train)
    y_pred = Knn.predict(X_test_scale)
    print('For k=', k)
    print('R2:', round(r2_score(y_test, y_pred), 3))
    print('MSE:', round(mean_squared_error(y_test, y_pred), 3))
    print('')

For k= 1
R2: 0.609
MSE: 0.699

For k= 2
R2: 0.668
MSE: 0.595

For k= 3
R2: 0.692
MSE: 0.552

For k= 4
R2: 0.696
MSE: 0.545

For k= 5
R2: 0.702
MSE: 0.533

For k= 6
R2: 0.704
MSE: 0.53

For k= 7
R2: 0.708
MSE: 0.524

For k= 8
R2: 0.713
MSE: 0.514

For k= 9
R2: 0.71
MSE: 0.52

For k= 10
R2: 0.707
MSE: 0.525

For k= 11
R2: 0.704
MSE: 0.531

For k= 12
R2: 0.7
MSE: 0.538

For k= 13
R2: 0.7
MSE: 0.537

For k= 14
R2: 0.701
MSE: 0.535

For k= 15
R2: 0.701
MSE: 0.536

For k= 16
R2: 0.699
MSE: 0.538

For k= 17
R2: 0.699
MSE: 0.539

For k= 18
R2: 0.7
MSE: 0.538

For k= 19
R2: 0.699
MSE: 0.539



In [14]:
# with MinMaxScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import MinMaxScaler

X = cookies.drop(['quality'], axis=1)
y = cookies['quality']

X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=0)

#scaling the x values
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train_scale = scaler.transform(X_train)
X_test_scale = scaler.transform(X_test)

for k in range(1, 20):
    Knn = KNeighborsRegressor(n_neighbors = k)
    Knn.fit(X_train_scale, y_train)
    y_pred = Knn.predict(X_test_scale)
    print('For k=', k)
    print('R2:', round(r2_score(y_test, y_pred), 3))
    print('MSE:', round(mean_squared_error(y_test, y_pred), 3))
    print('')

For k= 1
R2: 0.606
MSE: 0.706

For k= 2
R2: 0.663
MSE: 0.603

For k= 3
R2: 0.669
MSE: 0.592

For k= 4
R2: 0.68
MSE: 0.573

For k= 5
R2: 0.69
MSE: 0.555

For k= 6
R2: 0.696
MSE: 0.544

For k= 7
R2: 0.697
MSE: 0.542

For k= 8
R2: 0.701
MSE: 0.535

For k= 9
R2: 0.704
MSE: 0.53

For k= 10
R2: 0.707
MSE: 0.524

For k= 11
R2: 0.708
MSE: 0.522

For k= 12
R2: 0.708
MSE: 0.522

For k= 13
R2: 0.708
MSE: 0.522

For k= 14
R2: 0.709
MSE: 0.521

For k= 15
R2: 0.708
MSE: 0.522

For k= 16
R2: 0.708
MSE: 0.522

For k= 17
R2: 0.708
MSE: 0.523

For k= 18
R2: 0.707
MSE: 0.524

For k= 19
R2: 0.709
MSE: 0.521



In [15]:
#with Standard Scaler and dropping more columns
from sklearn.neighbors import KNeighborsRegressor

X = cookies.drop(['quality', 'sugar to flour ratio', 'pH', ], axis=1)
y = cookies['quality']

X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=0)

#scaling the x values
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scale = scaler.transform(X_train)
X_test_scale = scaler.transform(X_test)

for k in range(1, 20):
    Knn = KNeighborsRegressor(n_neighbors = k)
    Knn.fit(X_train_scale, y_train)
    y_pred = Knn.predict(X_test_scale)
    print('For k=', k)
    print('R2:', round(r2_score(y_test, y_pred), 3))
    print('MSE:', round(mean_squared_error(y_test, y_pred), 3))
    print('')

For k= 1
R2: 0.622
MSE: 0.677

For k= 2
R2: 0.668
MSE: 0.595

For k= 3
R2: 0.681
MSE: 0.57

For k= 4
R2: 0.688
MSE: 0.559

For k= 5
R2: 0.694
MSE: 0.549

For k= 6
R2: 0.697
MSE: 0.543

For k= 7
R2: 0.701
MSE: 0.535

For k= 8
R2: 0.705
MSE: 0.528

For k= 9
R2: 0.709
MSE: 0.521

For k= 10
R2: 0.709
MSE: 0.521

For k= 11
R2: 0.708
MSE: 0.523

For k= 12
R2: 0.709
MSE: 0.521

For k= 13
R2: 0.71
MSE: 0.519

For k= 14
R2: 0.708
MSE: 0.524

For k= 15
R2: 0.705
MSE: 0.528

For k= 16
R2: 0.704
MSE: 0.529

For k= 17
R2: 0.705
MSE: 0.529

For k= 18
R2: 0.703
MSE: 0.531

For k= 19
R2: 0.702
MSE: 0.533



# Linear Regression

In [16]:
from sklearn.linear_model import LinearRegression

X = cookies.drop(['quality'], axis=1)
y = cookies['quality']

X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=0)

#scaling the x values
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scale = scaler.transform(X_train)
X_test_scale = scaler.transform(X_test)

reg = LinearRegression()
reg.fit(X_train_scale, y_train)

y_pred = reg.predict(X_test_scale)

print('R2:', round(r2_score(y_test, y_pred), 3))
print('MSE:', round(mean_squared_error(y_test, y_pred), 3))

R2: 0.7
MSE: 0.538


In [17]:
from sklearn.linear_model import LinearRegression

X = cookies.drop(['quality'], axis=1)
y = cookies['quality']

X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=0)

#scaling the x values
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train_scale = scaler.transform(X_train)
X_test_scale = scaler.transform(X_test)

reg = LinearRegression()
reg.fit(X_train_scale, y_train)

y_pred = reg.predict(X_test_scale)

print('R2:', round(r2_score(y_test, y_pred), 3))
print('MSE:', round(mean_squared_error(y_test, y_pred), 3))

R2: 0.7
MSE: 0.538


# Lasso Regression

In [18]:
from sklearn import linear_model

X = cookies.drop(['quality'], axis=1)
y = cookies['quality']

X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=0)

#scaling the x values
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scale = scaler.transform(X_train)
X_test_scale = scaler.transform(X_test)

model_lasso = linear_model.Lasso(alpha=0.1)
model_lasso.fit(X_train_scale, y_train) 
pred_train_lasso= model_lasso.predict(X_train_scale)
print('MSE:', mean_squared_error(y_train,pred_train_lasso))
print('R2 Score:', r2_score(y_train, pred_train_lasso))
print('')
pred_test_lasso= model_lasso.predict(X_test_scale)
print('MSE:', mean_squared_error(y_test,pred_test_lasso)) 
print('R2 Score:', r2_score(y_test, pred_test_lasso))

MSE: 0.6149511559085641
R2 Score: 0.632171304346357

MSE: 0.6123923232531381
R2 Score: 0.6579349597527941


In [19]:
from sklearn import linear_model

X = cookies.drop(['quality'], axis=1)
y = cookies['quality']

X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=0)

#scaling the x values
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train_scale = scaler.transform(X_train)
X_test_scale = scaler.transform(X_test)

model_lasso = linear_model.Lasso(alpha=0.1)
model_lasso.fit(X_train_scale, y_train) 
pred_train_lasso= model_lasso.predict(X_train_scale)
print('MSE:', mean_squared_error(y_train,pred_train_lasso))
print('R2 Score:', r2_score(y_train, pred_train_lasso))
print('')
pred_test_lasso= model_lasso.predict(X_test_scale)
print('MSE:', mean_squared_error(y_test,pred_test_lasso)) 
print('R2 Score:', r2_score(y_test, pred_test_lasso))

MSE: 0.8009116980553284
R2 Score: 0.5209403179441474

MSE: 0.8267194370293728
R2 Score: 0.5382178927417659


# Ridge Regression

In [20]:
from sklearn.linear_model import Ridge

X = cookies.drop(['quality'], axis=1)
y = cookies['quality']

X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=0)

#scaling the x values
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scale = scaler.transform(X_train)
X_test_scale = scaler.transform(X_test)

rr = Ridge(alpha=0.01)
rr.fit(X_train_scale, y_train) 
pred_train_rr= rr.predict(X_train_scale)
print('MSE:', mean_squared_error(y_train,pred_train_rr))
print('R2 Score:', r2_score(y_train, pred_train_rr))

print('')

pred_test_rr= rr.predict(X_test_scale)
print('MSE:', mean_squared_error(y_test,pred_test_rr)) 
print('R2 Score:', r2_score(y_test, pred_test_rr))

MSE: 0.557106264439786
R2 Score: 0.666770818104082

MSE: 0.5376962038157337
R2 Score: 0.6996581005099066


In [21]:
from sklearn.linear_model import Ridge

X = cookies.drop(['quality'], axis=1)
y = cookies['quality']

X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=0)

#scaling the x values
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train_scale = scaler.transform(X_train)
X_test_scale = scaler.transform(X_test)

rr = Ridge(alpha=0.01)
rr.fit(X_train_scale, y_train) 
pred_train_rr= rr.predict(X_train_scale)
print('MSE:', mean_squared_error(y_train,pred_train_rr))
print('R2 Score:', r2_score(y_train, pred_train_rr))

print('')

pred_test_rr= rr.predict(X_test_scale)
print('MSE:', mean_squared_error(y_test,pred_test_rr)) 
print('R2 Score:', r2_score(y_test, pred_test_rr))

MSE: 0.5571062891675566
R2 Score: 0.6667708033133405

MSE: 0.5376939787775796
R2 Score: 0.6996593433533207


# ElasticNet Regression

In [22]:
from sklearn.linear_model import ElasticNet

X = cookies.drop(['quality'], axis=1)
y = cookies['quality']

X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=0)

#scaling the x values
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scale = scaler.transform(X_train)
X_test_scale = scaler.transform(X_test)

model_enet = ElasticNet(alpha = 0.01)
model_enet.fit(X_train_scale, y_train) 
pred_train_enet= model_enet.predict(X_train_scale)

print('MSE:', mean_squared_error(y_train,pred_train_enet))
print('R2 Score:', r2_score(y_train,pred_train_enet))

print('')

pred_test_rr= rr.predict(X_test_scale)
print('MSE:', mean_squared_error(y_train,pred_train_enet)) 
print('R2 Score:', r2_score(y_train,pred_train_enet))

MSE: 0.5581871730202816
R2 Score: 0.6661242802620689

MSE: 0.5581871730202816
R2 Score: 0.6661242802620689


In [23]:
from sklearn.linear_model import ElasticNet

X = cookies.drop(['quality'], axis=1)
y = cookies['quality']

X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=0)

#scaling the x values
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train_scale = scaler.transform(X_train)
X_test_scale = scaler.transform(X_test)

model_enet = ElasticNet(alpha = 0.01)
model_enet.fit(X_train_scale, y_train) 
pred_train_enet= model_enet.predict(X_train_scale)

print('MSE:', mean_squared_error(y_train,pred_train_enet))
print('R2 Score:', r2_score(y_train,pred_train_enet))

print('')

pred_test_rr= rr.predict(X_test_scale)
print('MSE:', mean_squared_error(y_train,pred_train_enet)) 
print('R2 Score:', r2_score(y_train,pred_train_enet))

MSE: 0.5863640935808411
R2 Score: 0.6492704540065282

MSE: 0.5863640935808411
R2 Score: 0.6492704540065282
