In [61]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn import metrics


In [62]:
# read in data set
df = pd.read_csv("../../data/Scrabble_Player_Rating_Training_Data.csv")
df.head()


Unnamed: 0,score,rating,total_turns,first_five_turns_points,max_points_turn,min_points_turn,max_min_difference,first,winner,initial_time_seconds,...,game_end_reason_CONSECUTIVE_ZEROES,game_end_reason_RESIGNED,game_end_reason_STANDARD,game_end_reason_TIME,lexicon_CSW21,lexicon_ECWL,lexicon_NSWL20,lexicon_NWL20,rating_mode_CASUAL,rating_mode_RATED
0,335,1637,13,122,68,8,60,1,-1,1200,...,0,0,1,0,0,0,0,1,1,0
1,429,1500,14,95,98,2,96,0,1,1200,...,0,0,1,0,0,0,0,1,1,0
2,440,1811,14,136,103,13,90,0,1,900,...,0,0,1,0,1,0,0,0,0,1
3,318,2071,13,151,76,0,76,1,-1,900,...,0,0,1,0,1,0,0,0,0,1
4,119,1473,14,20,26,0,26,0,-1,3600,...,0,1,0,0,1,0,0,0,1,0


## Check if there are null data sets
- results show nothing is null

In [63]:
df.isnull().sum()

score                                 0
rating                                0
total_turns                           0
first_five_turns_points               0
max_points_turn                       0
min_points_turn                       0
max_min_difference                    0
first                                 0
winner                                0
initial_time_seconds                  0
increment_seconds                     0
max_overtime_minutes                  0
game_duration_seconds                 0
time_used                             0
points_per_turn                       0
points_per_second                     0
time_control_name_blitz               0
time_control_name_rapid               0
time_control_name_regular             0
time_control_name_ultrablitz          0
game_end_reason_CONSECUTIVE_ZEROES    0
game_end_reason_RESIGNED              0
game_end_reason_STANDARD              0
game_end_reason_TIME                  0
lexicon_CSW21                         0


## Graph a Correlation Matrix 

In [64]:
# Can graph a correlation matrix as a heat map! 
fig = px.imshow(df.corr())
fig.show()

In [65]:
X = df.drop('rating', axis = 1)
y = df['rating']

## Determin Correlation Between Rating and Rest of Columns

In [66]:
correlation = X.corrwith(y, axis = 0)
correlation

score                                 0.425136
total_turns                          -0.237326
first_five_turns_points               0.297990
max_points_turn                       0.216420
min_points_turn                       0.172253
max_min_difference                    0.066872
first                                -0.004075
winner                                0.196503
initial_time_seconds                  0.149568
increment_seconds                     0.005766
max_overtime_minutes                 -0.434311
game_duration_seconds                 0.150011
time_used                             0.096389
points_per_turn                       0.465008
points_per_second                    -0.081960
time_control_name_blitz              -0.056894
time_control_name_rapid               0.025969
time_control_name_regular             0.026359
time_control_name_ultrablitz         -0.023999
game_end_reason_CONSECUTIVE_ZEROES   -0.030732
game_end_reason_RESIGNED              0.014112
game_end_reas

In [67]:
fig = px.bar(correlation)
fig.show()

### Columns to Drop Based on Low Correlation
- first
- increment_seconds
- time_control_name_blitz
- time_control_name_rapid
- time_control_name_regular
- time_control_name_ultrablitz
- game_end_reason_CONSECUTIVE_ZEROES
- game_end_reason_RESIGNED
- game_end_reason_STANDARD
- game_end_reason_TIME

In [68]:
X_dropped = X.drop(labels = ['first', 'increment_seconds', 'time_control_name_blitz', 'time_control_name_rapid', 
                   'time_control_name_regular', 'time_control_name_ultrablitz', 'game_end_reason_CONSECUTIVE_ZEROES',
                    'game_end_reason_RESIGNED', 'game_end_reason_STANDARD', 'game_end_reason_TIME'], 
                   axis = 1, inplace = False)
X_dropped.head()

Unnamed: 0,score,total_turns,first_five_turns_points,max_points_turn,min_points_turn,max_min_difference,winner,initial_time_seconds,max_overtime_minutes,game_duration_seconds,time_used,points_per_turn,points_per_second,lexicon_CSW21,lexicon_ECWL,lexicon_NSWL20,lexicon_NWL20,rating_mode_CASUAL,rating_mode_RATED
0,335,13,122,68,8,60,-1,1200,1,674.844274,0.56237,25.769231,0.496411,0,0,0,1,1,0
1,429,14,95,98,2,96,1,1200,1,674.844274,0.56237,30.642857,0.635702,0,0,0,1,1,0
2,440,14,136,103,13,90,1,900,5,492.268262,0.546965,31.428571,0.893822,1,0,0,0,0,1
3,318,13,151,76,0,76,-1,900,5,492.268262,0.546965,24.461538,0.645989,1,0,0,0,0,1
4,119,14,20,26,0,26,-1,3600,1,350.861141,0.097461,8.5,0.339166,1,0,0,0,1,0


In [69]:
X.head()

Unnamed: 0,score,total_turns,first_five_turns_points,max_points_turn,min_points_turn,max_min_difference,first,winner,initial_time_seconds,increment_seconds,...,game_end_reason_CONSECUTIVE_ZEROES,game_end_reason_RESIGNED,game_end_reason_STANDARD,game_end_reason_TIME,lexicon_CSW21,lexicon_ECWL,lexicon_NSWL20,lexicon_NWL20,rating_mode_CASUAL,rating_mode_RATED
0,335,13,122,68,8,60,1,-1,1200,0,...,0,0,1,0,0,0,0,1,1,0
1,429,14,95,98,2,96,0,1,1200,0,...,0,0,1,0,0,0,0,1,1,0
2,440,14,136,103,13,90,0,1,900,0,...,0,0,1,0,1,0,0,0,0,1
3,318,13,151,76,0,76,1,-1,900,0,...,0,0,1,0,1,0,0,0,0,1
4,119,14,20,26,0,26,0,-1,3600,0,...,0,1,0,0,1,0,0,0,1,0


In [70]:
y.head()

0    1637
1    1500
2    1811
3    2071
4    1473
Name: rating, dtype: int64

### Checking for Score Prediction vs Rating Prediction

In [93]:
scoreX = df.drop('score', axis = 1)
scoreY = df['score']

In [94]:
correlation = scoreX.corrwith(scoreY, axis = 0)
correlation

rating                                0.425136
total_turns                          -0.139057
first_five_turns_points               0.460992
max_points_turn                       0.610654
min_points_turn                       0.242930
max_min_difference                    0.337810
first                                 0.085279
winner                                0.628026
initial_time_seconds                  0.061631
increment_seconds                    -0.001568
max_overtime_minutes                 -0.061679
game_duration_seconds                 0.001401
time_used                            -0.037402
points_per_turn                       0.827224
points_per_second                     0.247201
time_control_name_blitz              -0.055747
time_control_name_rapid               0.002961
time_control_name_regular             0.039114
time_control_name_ultrablitz         -0.004782
game_end_reason_CONSECUTIVE_ZEROES   -0.029680
game_end_reason_RESIGNED             -0.116285
game_end_reas

In [95]:
fig = px.bar(correlation)
fig.show()

#### Features Not Heavily Coorelated with Score
- increment_seconds
- game_duration_seconds
- time_control_name_blitz
- time_control_name_rapid 
- time_control_name_regular
- time_control_name_ultrablitz

In [120]:
# DROP NON COORELATED DATA FROM scoreX
scoreX_dropped = scoreX.drop(labels = ['increment_seconds', 'game_duration_seconds', 'time_control_name_blitz',
                                      'time_control_name_rapid', 'time_control_name_regular', 'time_control_name_ultrablitz'],
                                       axis = 1, inplace = False)

In [72]:
def split_data(X, y, frac = 0.2):
    idx_cutoff = int(X.shape[0]*(1 - frac))
    
    X_train, X_test, y_train, y_test = X.iloc[:idx_cutoff], X.iloc[idx_cutoff:], y.iloc[:idx_cutoff], y.iloc[idx_cutoff:]
    
    return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = split_data(X, y, frac = 0.2)

# SCORE
------------------------
------------------------

### Linear Regression (OLS) - Score: Dropped Columns

In [121]:
X_train_scored, X_test_scored, y_train_scored, y_test_scored = split_data(scoreX_dropped, scoreY, frac = 0.2)

In [125]:
scoredMod = LinearRegression()
scoredMod.fit(X_train_scored, y_train_scored)

In [126]:
scoredMod.score(X_test_scored, y_test_scored)

0.9411920977320398

### Linear Regression (OLS) - Score

In [96]:
X_train_score, X_test_score, y_train_score, y_test_score = split_data(scoreX, scoreY, frac = 0.2)

In [97]:
scoreMod = LinearRegression()
scoreMod.fit(X_train_score, y_train_score)

In [98]:
scoreMod.score(X_test_score, y_test_score)

0.9417606873781404

In [100]:
metrics.mean_squared_error(y_test_score, scoreMod.predict(X_test_score), squared=False)

18.255262253451093

In [114]:
scoreMod.coef_

array([ 1.58975545e-02,  2.16061306e+01, -6.76516980e-02,  1.58735712e-01,
        1.16580557e-01,  4.21551542e-02,  2.49397563e+00,  5.53831920e+00,
       -1.93263382e-04, -3.47667506e-02,  2.12946711e-02,  5.14394683e-03,
        3.82485897e+00,  1.16680032e+01,  4.17116471e+00,  9.06757093e-01,
        4.94948841e+00,  4.40611926e+00, -1.02623648e+01, -3.78474901e+01,
        1.11340022e+01,  1.93521738e+01,  7.36131408e+00, -4.28968541e+00,
        4.46751053e+00, -1.69677848e+00,  1.51895336e+00, -7.29246022e-01,
        7.29246022e-01])

In [115]:
scoreMod.intercept_

-312.9383424383677

### Pipelines

In [103]:
from sklearn.pipeline import make_pipeline
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [105]:
scorePipe1 = make_pipeline(StandardScaler(), LinearRegression())

In [106]:
scorePipe1.fit(X_train_score, y_train_score)

scoreScore1 = scorePipe1.score(X_test_score, y_test_score)
scoreScore1

0.9417606873781403

In [113]:
scorePipe2 = make_pipeline(StandardScaler(), PCA(n_components = 0.95), LinearRegression())

In [112]:
scorePipe2.fit(X_train_score, y_train_score)

scoreScore2 = scorePipe2.score(X_test_score, y_test_score)
scoreScore2

0.7617558141193086

## Lassso - Score

In [133]:
RidgePipe = make_pipeline(StandardScaler(), Ridge(alpha = 39.29999999999974))

In [134]:
RidgePipe.fit(X_train_score, y_train_score)

RidgeScore = RidgePipe.score(X_test_score, y_test_score)
RidgeScore

0.9418001485212336

In [116]:
lasso_r_squareds2 = []
a = .0001
while a < .01:
    mod = Lasso(alpha=a)
    mod.fit(X_train_score, y_train_score)
    r2 = mod.score(X_test_score, y_test_score)
    # append a tuple of the current alpha and r2 of that alpha
    lasso_r_squareds2.append((a, r2))
    # increment alpha by 0.1 each iteration
    a += .0001
    
lasso_r_squareds2 = sorted(lasso_r_squareds2, key=lambda x: (x[1]), reverse=True)
print(lasso_r_squareds2[:5])


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 8.184e+05, tolerance: 4.545e+04


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 8.186e+05, tolerance: 4.545e+04



[(0.0039999999999999975, 0.9417628734128893), (0.004099999999999998, 0.941762873043473), (0.0038999999999999972, 0.9417628698068989), (0.004199999999999998, 0.9417628686842785), (0.0037999999999999974, 0.9417628620801708)]


In [140]:
lasso_r_squareds2 = []
a = .0001
while a < .01:
    lassoPipe = make_pipeline(StandardScaler(), Lasso(alpha = a))
    lassoPipe.fit(X_train_score, y_train_score)
    r2 = lassoPipe.score(X_test_score, y_test_score)
    # append a tuple of the current alpha and r2 of that alpha
    lasso_r_squareds2.append((a, r2))
    # increment alpha by 0.1 each iteration
    a += .0001
    
lasso_r_squareds2 = sorted(lasso_r_squareds2, key=lambda x: (x[1]), reverse=True)
print(lasso_r_squareds2[:5])


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 1.503e+06, tolerance: 4.545e+04


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 5.892e+05, tolerance: 4.545e+04


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 5.895e+05, tolerance: 4.545e+04


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 5.898e+05, tolerance: 4.545e+04


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 5.900e+05, tolerance: 4.545e+04


Obje

[(0.0001, 0.94176066066913), (0.0002, 0.9417606236587102), (0.00030000000000000003, 0.9417605916047452), (0.0004, 0.9417605324078943), (0.0005, 0.9417604933194954)]


### Lasso - Score (Checking Around .0039)

In [118]:
lasso_r_squareds2 = []
a = .00380
while a < .00400:
    mod = Lasso(alpha=a)
    mod.fit(X_train_score, y_train_score)
    r2 = mod.score(X_test_score, y_test_score)
    # append a tuple of the current alpha and r2 of that alpha
    lasso_r_squareds2.append((a, r2))
    # increment alpha by 0.1 each iteration
    a += .00001
    
lasso_r_squareds2 = sorted(lasso_r_squareds2, key=lambda x: (x[1]), reverse=True)
print(lasso_r_squareds2[:5])

[(0.003999999999999997, 0.9417628734128893), (0.003989999999999997, 0.9417628732455539), (0.003979999999999997, 0.9417628730406146), (0.003969999999999998, 0.94176287279573), (0.003959999999999998, 0.9417628723799946)]


## Ridgge - Score

In [137]:
ridge_r_squareds = []
a = 1
while a < 1000:
    ridgePipe = make_pipeline(StandardScaler(), Ridge(alpha = a))
    ridgePipe.fit(X_train_score, y_train_score)
    r2 = ridgePipe.score(X_test_score, y_test_score)
    # append a tuple of the current alpha and r2 of that alpha
    ridge_r_squareds.append((a, r2))
    # increment alpha by 0.1 each iteration
    a += 1
    
ridge_r_squareds = sorted(ridge_r_squareds, key=lambda x: (x[1]), reverse=True)
print(ridge_r_squareds[:5])

[(340, 0.9419367527747865), (339, 0.9419367517151404), (341, 0.9419367509868758), (338, 0.9419367478062571), (342, 0.9419367463530873)]


### Ridge Pipeline - Score (Checking Around alpha 340)

In [139]:
ridge_r_squareds = []
a = 339
while a < 341:
    ridgePipe = make_pipeline(StandardScaler(), Ridge(alpha = a))
    ridgePipe.fit(X_train_score, y_train_score)
    r2 = ridgePipe.score(X_test_score, y_test_score)
    # append a tuple of the current alpha and r2 of that alpha
    ridge_r_squareds.append((a, r2))
    # increment alpha by 0.1 each iteration
    a += .01
    
ridge_r_squareds = sorted(ridge_r_squareds, key=lambda x: (x[1]), reverse=True)
print(ridge_r_squareds[:5])

[(339.8699999999992, 0.9419367527980976), (339.8799999999992, 0.9419367527980131), (339.8599999999992, 0.9419367527978973), (339.8899999999992, 0.9419367527976438), (339.8499999999992, 0.9419367527974124)]


In [117]:
ridge_r_squareds = []
a = 1
while a < 100:
    mod = Ridge(alpha=a)
    mod.fit(X_train_score, y_train_score)
    r2 = mod.score(X_test_score, y_test_score)
    # append a tuple of the current alpha and r2 of that alpha
    ridge_r_squareds.append((a, r2))
    # increment alpha by 0.1 each iteration
    a += 1
    
ridge_r_squareds = sorted(ridge_r_squareds, key=lambda x: (x[1]), reverse=True)
print(ridge_r_squareds[:5])

[(39, 0.9417767417303307), (40, 0.9417767383920175), (38, 0.9417767278818656), (41, 0.9417767181294997), (37, 0.9417766965803569)]


### Ridge - Score (Checking Around 39)

In [119]:
ridge_r_squareds = []
a = 38
while a < 41:
    mod = Ridge(alpha=a)
    mod.fit(X_train_score, y_train_score)
    r2 = mod.score(X_test_score, y_test_score)
    # append a tuple of the current alpha and r2 of that alpha
    ridge_r_squareds.append((a, r2))
    # increment alpha by 0.1 each iteration
    a += .01
    
ridge_r_squareds = sorted(ridge_r_squareds, key=lambda x: (x[1]), reverse=True)
print(ridge_r_squareds[:5])

[(39.29999999999974, 0.9417767425214301), (39.30999999999974, 0.9417767425212442), (39.28999999999974, 0.9417767425199052), (39.31999999999974, 0.9417767425193478), (39.279999999999745, 0.9417767425166693)]


# RATING
----------------------------
----------------------------

### Linear Regression (OLS) - Rating

In [73]:
mod = LinearRegression()
mod.fit(X, y)

In [74]:
mod.score(X, y)

0.6398605759894012

In [75]:
# Root Mean squared error for Mod: 
# RMSE
metrics.mean_squared_error(y, mod.predict(X), squared=False)

139.10084826852085

In [76]:
# mean absolute percent error
metrics.mean_absolute_percentage_error(y, mod.predict(X))

0.058701847875227184

In [77]:
# r-squared
metrics.r2_score(y, mod.predict(X))

0.6398605759894012

In [78]:
row1 = X.iloc[2:3]
mod.predict(row1)

array([2034.41197972])

In [79]:
y.iloc[2:3]

2    1811
Name: rating, dtype: int64

### Model Pipeline
- linear regression
- implement PCA

In [81]:
pipe2 = make_pipeline(LinearRegression())

In [82]:
pipe2.fit(X_train, y_train)

test_score2 = pipe2.score(X_test, y_test)
test_score2

0.6414208804442846

In [86]:
pipe1 = make_pipeline(StandardScaler(), LinearRegression())

In [87]:
pipe1.fit(X_train, y_train)

test_score1 = pipe1.score(X_test, y_test)
test_score1

0.6414208804442847

In [53]:
pipe = make_pipeline(StandardScaler(), PCA(n_components=0.95), LinearRegression())

In [54]:
pipe.fit(X_train, y_train)

test_score = pipe.score(X_test, y_test)
test_score

0.6147615791226693

### Pipeline with PCA metrics:
------------------------------------

In [55]:
metrics.mean_squared_error(y_train, pipe.predict(X_train), squared=False)

144.06695795409172

In [29]:
dropped_mod = LinearRegression()

In [30]:
X_train_d, X_test_d, y_train_d, y_test_d = split_data(X_dropped, y, frac = 0.2)

In [32]:
dropped_mod.fit(X_train_d, y_train_d)

### Dropped Model Metrics:
------------------------------------------------

In [34]:
mod_score_d = dropped_mod.score(X_test_d, y_test_d)
mod_score_d

0.6347539449176247

In [36]:
dropped_mod.coef_

array([ 1.06715175e+00, -1.20102752e+01, -1.45304964e-01, -3.22877308e-01,
        1.11853317e-01, -4.34730625e-01, -3.78983523e+00,  2.33218281e-02,
       -3.46907330e-01, -2.54522290e-02, -2.13169880e+01,  2.99192742e+00,
       -2.49340784e+01,  2.30952177e+02, -1.10652898e+02, -1.24266726e+02,
        3.96744659e+00, -2.38814584e+01,  2.38814584e+01])

In [40]:
dropped_mod.intercept_

1488.5980185756048

In [44]:
metrics.mean_squared_error(y_train_d, dropped_mod.predict(X_train_d), squared=False)

140.6134678580388

In [45]:
metrics.mean_absolute_percentage_error(y_train_d, dropped_mod.predict(X_train_d))

0.059465050850960814

#### Model without Dropping Features Produced Best Results
- The less percentage of components for PCA produced worse results
- dropping uncorrelated features produced worse results

### Lasso Regression

In [128]:
# lasso_r_squareds = []
# a = .0001
# while a < .01:
#     mod = Lasso(alpha=a)
#     mod.fit(X_train, y_train)
#     r2 = mod.score(X_test, y_test)
#     # append a tuple of the current alpha and r2 of that alpha
#     lasso_r_squareds.append((a, r2))
#     # increment alpha by 0.1 each iteration
#     a += .0001
    
# lasso_r_squareds = sorted(lasso_r_squareds, key=lambda x: (x[1]), reverse=True)
# print(lasso_r_squareds[:5])

In [127]:
# lasso_r_squareds2 = []
# a = .00001
# while a < .0001:
#     mod = Lasso(alpha=a)
#     mod.fit(X_train, y_train)
#     r2 = mod.score(X_test, y_test)
#     # append a tuple of the current alpha and r2 of that alpha
#     lasso_r_squareds2.append((a, r2))
#     # increment alpha by 0.1 each iteration
#     a += .00001
    
# lasso_r_squareds2 = sorted(lasso_r_squareds2, key=lambda x: (x[1]), reverse=True)
# print(lasso_r_squareds2[:5])

In [142]:
lasso_r_squareds = []
a = .0001
while a < .01:
    lassoPipe = make_pipeline(StandardScaler(), Lasso(alpha = a))
    lassoPipe.fit(X_train, y_train)
    r2 = lassoPipe.score(X_test, y_test)
    # append a tuple of the current alpha and r2 of that alpha
    lasso_r_squareds.append((a, r2))
    # increment alpha by 0.1 each iteration
    a += .0001
    
lasso_r_squareds = sorted(lasso_r_squareds, key=lambda x: (x[1]), reverse=True)
print(lasso_r_squareds[:5])


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.128e+07, tolerance: 4.338e+05


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.129e+07, tolerance: 4.338e+05


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.129e+07, tolerance: 4.338e+05


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.129e+07, tolerance: 4.338e+05


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.129e+07, tolerance: 4.338e+05


Obje


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.133e+07, tolerance: 4.338e+05


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 4.133e+07, tolerance: 4.338e+05


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.547e+07, tolerance: 4.338e+05


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.551e+07, tolerance: 4.338e+05


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.551e+07, tolerance: 4.338e+05


Obje


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.554e+07, tolerance: 4.338e+05


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.554e+07, tolerance: 4.338e+05


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.554e+07, tolerance: 4.338e+05


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.554e+07, tolerance: 4.338e+05


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.555e+07, tolerance: 4.338e+05


Obje


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.558e+07, tolerance: 4.338e+05


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.558e+07, tolerance: 4.338e+05


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.558e+07, tolerance: 4.338e+05


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.558e+07, tolerance: 4.338e+05


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.558e+07, tolerance: 4.338e+05


Obje


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.561e+07, tolerance: 4.338e+05


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.561e+07, tolerance: 4.338e+05


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.561e+07, tolerance: 4.338e+05



[(0.009999999999999995, 0.6414225085827354), (0.009899999999999996, 0.6414224934415629), (0.009799999999999996, 0.6414224782773558), (0.009699999999999997, 0.641422463090114), (0.009599999999999997, 0.6414224478798378)]



Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.561e+07, tolerance: 4.338e+05



### Ridge Regression

In [141]:
ridge_r_squareds = []
a = 1
while a < 1000:
    ridgePipe = make_pipeline(StandardScaler(), Ridge(alpha = a))
    ridgePipe.fit(X_train, y_train)
    r2 = ridgePipe.score(X_test, y_test)
    # append a tuple of the current alpha and r2 of that alpha
    ridge_r_squareds.append((a, r2))
    # increment alpha by 0.1 each iteration
    a += 1
    
ridge_r_squareds = sorted(ridge_r_squareds, key=lambda x: (x[1]), reverse=True)

In [144]:
print(ridge_r_squareds[:5])

[(157, 0.6414347162853822), (158, 0.6414347162763063), (156, 0.6414347153484845), (159, 0.641434715324046), (155, 0.6414347134628133)]


#### Ridge Checking Around 157

In [145]:
ridge_r_squareds = []
a = 156
while a < 158:
    ridgePipe = make_pipeline(StandardScaler(), Ridge(alpha = a))
    ridgePipe.fit(X_train, y_train)
    r2 = ridgePipe.score(X_test, y_test)
    # append a tuple of the current alpha and r2 of that alpha
    ridge_r_squareds.append((a, r2))
    # increment alpha by 0.1 each iteration
    a += .01
    
ridge_r_squareds = sorted(ridge_r_squareds, key=lambda x: (x[1]), reverse=True)
print(ridge_r_squareds[:5])

[(157.48999999999864, 0.6414347163989611), (157.49999999999864, 0.6414347163989164), (157.47999999999865, 0.6414347163989114), (157.50999999999863, 0.6414347163987772), (157.46999999999866, 0.641434716398767)]


In [60]:
ridge_r_squareds = []
a = 1
while a < 1000:
    mod = Ridge(alpha=a)
    mod.fit(X_train, y_train)
    r2 = mod.score(X_test, y_test)
    # append a tuple of the current alpha and r2 of that alpha
    ridge_r_squareds.append((a, r2))
    # increment alpha by 0.1 each iteration
    a += 1
    
ridge_r_squareds = sorted(ridge_r_squareds, key=lambda x: (x[1]), reverse=True)

In [84]:
# print(ridge_r_squareds[:5])

[(1, 0.6414113611881521), (2, 0.6414031600319194), (3, 0.6413961023650754), (4, 0.641390013291472), (5, 0.6413847385673208)]


In [None]:
# OLS score:
# 0.6414208804442847