In [28]:
import numpy as np
import pandas as pd
import os
import csv
import matplotlib.pyplot as plt
import sklearn
from sklearn import linear_model
from sklearn.linear_model import Ridge, RidgeCV, Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

### Data Retrieval/Cleaning

In [2]:
# Data retrieved from S3 bucket. 
url_completetable = "https://horse-races.s3.ca-central-1.amazonaws.com/nyra_2019_complete.csv"
colnames = ['track_id', 'race_date', 'race_number', 'program_number', 'trakus_index', 'latitude', 'longitude', 'distance_id', 'course_type', 'track_condition', 'run_up_distance', 'race_type', 'purse', 'post_time', 'weight_carried', 'jockey', 'odds', 'position_at_finish']
full = pd.read_csv(url_completetable, header=None, names=colnames)
full.head()

  full = pd.read_csv(url_completetable, header=None, names=colnames)


Unnamed: 0,track_id,race_date,race_number,program_number,trakus_index,latitude,longitude,distance_id,course_type,track_condition,run_up_distance,race_type,purse,post_time,weight_carried,jockey,odds,position_at_finish
0,AQU,2019-01-01,9,6,72,40.672902,-73.827607,600,D,GD,48,CLM,25000.0,420,120,Andre Shivnarine Worrie,2090,8
1,AQU,2019-01-01,9,6,73,40.672946,-73.827587,600,D,GD,48,CLM,25000.0,420,120,Andre Shivnarine Worrie,2090,8
2,AQU,2019-01-01,9,6,74,40.67299,-73.827568,600,D,GD,48,CLM,25000.0,420,120,Andre Shivnarine Worrie,2090,8
3,AQU,2019-01-01,9,6,63,40.67251,-73.827781,600,D,GD,48,CLM,25000.0,420,120,Andre Shivnarine Worrie,2090,8
4,AQU,2019-01-01,9,6,64,40.672553,-73.827762,600,D,GD,48,CLM,25000.0,420,120,Andre Shivnarine Worrie,2090,8


In [3]:
data = pd.DataFrame(full)
data.head()

Unnamed: 0,track_id,race_date,race_number,program_number,trakus_index,latitude,longitude,distance_id,course_type,track_condition,run_up_distance,race_type,purse,post_time,weight_carried,jockey,odds,position_at_finish
0,AQU,2019-01-01,9,6,72,40.672902,-73.827607,600,D,GD,48,CLM,25000.0,420,120,Andre Shivnarine Worrie,2090,8
1,AQU,2019-01-01,9,6,73,40.672946,-73.827587,600,D,GD,48,CLM,25000.0,420,120,Andre Shivnarine Worrie,2090,8
2,AQU,2019-01-01,9,6,74,40.67299,-73.827568,600,D,GD,48,CLM,25000.0,420,120,Andre Shivnarine Worrie,2090,8
3,AQU,2019-01-01,9,6,63,40.67251,-73.827781,600,D,GD,48,CLM,25000.0,420,120,Andre Shivnarine Worrie,2090,8
4,AQU,2019-01-01,9,6,64,40.672553,-73.827762,600,D,GD,48,CLM,25000.0,420,120,Andre Shivnarine Worrie,2090,8


In [44]:
# Used at various points to generate jockey names.
data.sample(n=5)

Unnamed: 0,track_id,race_date,race_number,program_number,trakus_index,latitude,longitude,distance_id,course_type,track_condition,run_up_distance,race_type,purse,post_time,weight_carried,jockey,odds,position_at_finish
1536402,SAR,2019-07-11,7,2,205,43.07045,-73.769032,900,I,YL,90,AOC,83000.0,439,122,Jose L. Ortiz,2600,9
1268637,AQU,2019-01-25,2,1,17,40.670693,-73.828626,600,D,MY,45,AOC,67000.0,1256,121,Junior Alvarado,480,7
1037284,AQU,2019-01-25,9,9,76,40.674026,-73.827119,550,D,GD,51,CLM,28000.0,429,117,Reylu Gutierrez,270,3
3954601,BEL,2019-05-03,4,9,169,40.715422,-73.727266,600,I,YL,96,MCL,41000.0,309,118,Kendrick Carmouche,1210,8
119565,SAR,2019-08-30,2,7,29,43.071989,-73.771572,900,D,FT,78,CLM,34000.0,134,123,Kendrick Carmouche,1780,5


In [4]:
# Data filtered/cleaned before modeling
data_joel = data[data['jockey'] == 'Joel Rosario']  
data_joel_drop = data_joel.drop_duplicates(subset=['jockey','race_number', 'program_number', 'race_date', 'track_id'])
data_joel_drop

data_eric = data[data['jockey'] == 'Eric Cancel']  
data_eric_drop = data_eric.drop_duplicates(subset=['jockey','race_number', 'program_number', 'race_date', 'track_id'])

data_manuel = data[data['jockey'] == 'Manuel Franco'] 
data_manuel_drop = data_manuel.drop_duplicates(subset=['jockey','race_number', 'program_number', 'race_date', 'track_id'])

data_andre = data[data['jockey'] == 'Andre Shivnarine Worrie'] 
data_andre_drop = data_andre.drop_duplicates(subset=['jockey','race_number', 'program_number', 'race_date', 'track_id'])

data_reylu = data[data['jockey'] == 'Reylu Gutierrez']  
data_reylu_drop = data_reylu.drop_duplicates(subset=['jockey','race_number', 'program_number', 'race_date', 'track_id'])


data_reduced = pd.concat([data_joel_drop, data_eric_drop, data_manuel_drop, data_andre_drop, data_reylu_drop], ignore_index=True, sort=False)

data_reduced = data_reduced.drop(columns = ['race_date', 'latitude', 'longitude', 'trakus_index','program_number', 'distance_id', 'run_up_distance', 'purse', 'post_time', 'odds'])
data_reduced


Unnamed: 0,track_id,race_number,course_type,track_condition,race_type,weight_carried,jockey,position_at_finish
0,BEL,6,T,GD,CLM,124,Joel Rosario,8
1,BEL,7,D,FT,CLM,122,Joel Rosario,5
2,BEL,3,D,FT,CLM,124,Joel Rosario,5
3,BEL,7,T,FM,CLM,123,Joel Rosario,5
4,SAR,6,D,FT,CLM,122,Joel Rosario,2
...,...,...,...,...,...,...,...,...
3196,AQU,8,D,SY,MCL,119,Reylu Gutierrez,3
3197,AQU,6,D,FT,MCL,119,Reylu Gutierrez,11
3198,AQU,5,D,FT,MSW,119,Reylu Gutierrez,10
3199,AQU,8,D,MY,MCL,119,Reylu Gutierrez,7


In [5]:
# One-hot encoding of variables with string values (data cleaned before evaluating)
data_dummies = pd.get_dummies(data_reduced)
data_dummies

Unnamed: 0,race_number,weight_carried,position_at_finish,track_id_AQU,track_id_BEL,track_id_SAR,course_type_D,course_type_I,course_type_O,course_type_T,...,race_type_SST,race_type_STK,race_type_STR,race_type_WCL,race_type_WMC,jockey_Andre Shivnarine Worrie,jockey_Eric Cancel,jockey_Joel Rosario,jockey_Manuel Franco,jockey_Reylu Gutierrez
0,6,124,8,0,1,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
1,7,122,5,0,1,0,1,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,124,5,0,1,0,1,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,7,123,5,0,1,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
4,6,122,2,0,0,1,1,0,0,0,...,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3196,8,119,3,1,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3197,6,119,11,1,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3198,5,119,10,1,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3199,8,119,7,1,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [6]:
data_dummies['race_number'].min() 

1

In [7]:
data_dummies['race_number'].max() 

13

In [8]:
# Dropping dependent variable column
dummies_dropped = data_dummies.drop(columns=['position_at_finish'])
dummies_dropped

Unnamed: 0,race_number,weight_carried,track_id_AQU,track_id_BEL,track_id_SAR,course_type_D,course_type_I,course_type_O,course_type_T,track_condition_FM,...,race_type_SST,race_type_STK,race_type_STR,race_type_WCL,race_type_WMC,jockey_Andre Shivnarine Worrie,jockey_Eric Cancel,jockey_Joel Rosario,jockey_Manuel Franco,jockey_Reylu Gutierrez
0,6,124,0,1,0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,0,0
1,7,122,0,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,124,0,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,7,123,0,1,0,0,0,0,1,1,...,0,0,0,0,0,0,0,1,0,0
4,6,122,0,0,1,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3196,8,119,1,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3197,6,119,1,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3198,5,119,1,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3199,8,119,1,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [9]:
# Getting complete list of remaining columns
dummies_dropped.columns

Index(['race_number', 'weight_carried', 'track_id_AQU', 'track_id_BEL',
       'track_id_SAR', 'course_type_D', 'course_type_I', 'course_type_O',
       'course_type_T', 'track_condition_FM ', 'track_condition_FT ',
       'track_condition_GD ', 'track_condition_MY ', 'track_condition_SF ',
       'track_condition_SY ', 'track_condition_YL ', 'race_type_ALW',
       'race_type_AOC', 'race_type_CLM', 'race_type_MCL', 'race_type_MSW',
       'race_type_SHP', 'race_type_SOC', 'race_type_SST', 'race_type_STK',
       'race_type_STR', 'race_type_WCL', 'race_type_WMC',
       'jockey_Andre Shivnarine Worrie', 'jockey_Eric Cancel',
       'jockey_Joel Rosario', 'jockey_Manuel Franco',
       'jockey_Reylu Gutierrez'],
      dtype='object')

### Training/Testing Model

In [16]:
# Setting X and y values (independent and dependent variables for model)
X = dummies_dropped.values[:, :33]  
y = data_dummies.values[:, 2]   

In [17]:
print(y)

[ 8  5  5 ... 10  7 12]


In [18]:
print(X)

[[  6 124   0 ...   1   0   0]
 [  7 122   0 ...   1   0   0]
 [  3 124   0 ...   1   0   0]
 ...
 [  5 119   1 ...   0   0   1]
 [  8 119   1 ...   0   0   1]
 [  8 120   1 ...   0   0   1]]


In [19]:
for i in range(5):
    print('X =', X[i], ', y =', y[i])

X = [  6 124   0   1   0   0   0   0   1   0   0   1   0   0   0   0   0   0
   1   0   0   0   0   0   0   0   0   0   0   0   1   0   0] , y = 8
X = [  7 122   0   1   0   1   0   0   0   0   1   0   0   0   0   0   0   0
   1   0   0   0   0   0   0   0   0   0   0   0   1   0   0] , y = 5
X = [  3 124   0   1   0   1   0   0   0   0   1   0   0   0   0   0   0   0
   1   0   0   0   0   0   0   0   0   0   0   0   1   0   0] , y = 5
X = [  7 123   0   1   0   0   0   0   1   1   0   0   0   0   0   0   0   0
   1   0   0   0   0   0   0   0   0   0   0   0   1   0   0] , y = 5
X = [  6 122   0   0   1   1   0   0   0   0   1   0   0   0   0   0   0   0
   1   0   0   0   0   0   0   0   0   0   0   0   1   0   0] , y = 2


In [20]:
# Training model
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [21]:
#Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [22]:
# Scaled Ridge Regression Model
ridgeReg = Ridge(alpha=10)
ridgeReg.fit(X_train_scaled,y_train)

#train and test scorefor ridge regression
train_score_ridge = ridgeReg.score(X_train_scaled, y_train)
test_score_ridge = ridgeReg.score(X_test_scaled, y_test)

print("\nRidge Model............................................\n")
print("Scaled Ridge Regression train score: {}".format(train_score_ridge))
print("Scaled Ridge Regression test score: {}".format(test_score_ridge))


Ridge Model............................................

Scaled Ridge Regression train score: 0.09070233207294798
Scaled Ridge Regression test score: 0.06549506596547983


In [23]:
# Scaled Ridge Regression coefficients

importance = ridgeReg.coef_
intercept = ridgeReg.intercept_
# summarize feature importance
for i,v in enumerate(importance):
 print('Feature: %0d, Coefficient: %.5f' % (i,v))

print('Intercept: ', intercept)

Feature: 0, Coefficient: 0.36980
Feature: 1, Coefficient: -0.05089
Feature: 2, Coefficient: -0.09235
Feature: 3, Coefficient: -0.06291
Feature: 4, Coefficient: 0.20623
Feature: 5, Coefficient: -0.11967
Feature: 6, Coefficient: 0.02793
Feature: 7, Coefficient: 0.10643
Feature: 8, Coefficient: 0.07029
Feature: 9, Coefficient: 0.05573
Feature: 10, Coefficient: 0.00876
Feature: 11, Coefficient: 0.03194
Feature: 12, Coefficient: -0.04159
Feature: 13, Coefficient: -0.05085
Feature: 14, Coefficient: -0.06035
Feature: 15, Coefficient: -0.06694
Feature: 16, Coefficient: 0.06935
Feature: 17, Coefficient: -0.12612
Feature: 18, Coefficient: 0.02062
Feature: 19, Coefficient: 0.07731
Feature: 20, Coefficient: 0.05768
Feature: 21, Coefficient: -0.00489
Feature: 22, Coefficient: -0.01762
Feature: 23, Coefficient: 0.00006
Feature: 24, Coefficient: -0.10906
Feature: 25, Coefficient: -0.03139
Feature: 26, Coefficient: -0.05187
Feature: 27, Coefficient: 0.08554
Feature: 28, Coefficient: 0.19755
Feature: 2

In [24]:
# Unscaled Ridge Regression Model

ridgeReg = Ridge(alpha=10)
ridgeReg.fit(X_train,y_train)

#train and test scorefor ridge regression
train_score_ridge = ridgeReg.score(X_train, y_train)
test_score_ridge = ridgeReg.score(X_test, y_test)

print("\nRidge Model............................................\n")
print("Uncaled Ridge Regression train score: {}".format(train_score_ridge))
print("Unscaled Ridge Regression test score: {}".format(test_score_ridge))


Ridge Model............................................

Uncaled Ridge Regression train score: 0.08927558895012666
Unscaled Ridge Regression test score: 0.06660011630019746


In [25]:
# Unscaled Ridge Regression coefficients

importance = ridgeReg.coef_
intercept = ridgeReg.intercept_
# summarize feature importance
for i,v in enumerate(importance):
 print('Feature: %0d, Coefficient: %.5f' % (i,v))

print('Intercept: ', intercept)


Feature: 0, Coefficient: 0.13623
Feature: 1, Coefficient: -0.01970
Feature: 2, Coefficient: -0.24837
Feature: 3, Coefficient: -0.21154
Feature: 4, Coefficient: 0.45991
Feature: 5, Coefficient: -0.36336
Feature: 6, Coefficient: -0.05341
Feature: 7, Coefficient: 0.36109
Feature: 8, Coefficient: 0.05568
Feature: 9, Coefficient: 0.28461
Feature: 10, Coefficient: 0.14393
Feature: 11, Coefficient: 0.25532
Feature: 12, Coefficient: -0.08540
Feature: 13, Coefficient: -0.26310
Feature: 14, Coefficient: -0.06298
Feature: 15, Coefficient: -0.27239
Feature: 16, Coefficient: 0.25938
Feature: 17, Coefficient: -0.35826
Feature: 18, Coefficient: 0.09260
Feature: 19, Coefficient: 0.24370
Feature: 20, Coefficient: 0.17812
Feature: 21, Coefficient: -0.02253
Feature: 22, Coefficient: -0.10847
Feature: 23, Coefficient: 0.02515
Feature: 24, Coefficient: -0.32564
Feature: 25, Coefficient: -0.11195
Feature: 26, Coefficient: -0.37155
Feature: 27, Coefficient: 0.49946
Feature: 28, Coefficient: 0.83501
Feature: 

In [26]:
# Scaled Lasso Regression model
print("\nLasso Model............................................\n")
lasso = Lasso(alpha = 10)
lasso.fit(X_train,y_train)
train_score_ls =lasso.score(X_train,y_train)
test_score_ls =lasso.score(X_test,y_test)

print("Scaled Lasso Regression train score: {}".format(train_score_ls))
print("Scaled Lasso Regression test score: {}".format(test_score_ls))


Lasso Model............................................

Scaled Lasso Regression train score: 0.0
Scaled Lasso Regression test score: -0.0011198358771984385


In [29]:
# Scaled Linear Regression model

lr = LinearRegression()
lr.fit(X_train_scaled, y_train)

#actual
actual = y_test

train_score_lr = lr.score(X_train_scaled, y_train)
test_score_lr = lr.score(X_test_scaled, y_test)

print("Scaled Linear Regression train score: {}".format(train_score_lr))
print("Scaled Linear Regression test score: {}".format(test_score_lr))

Scaled Linear Regression train score: 0.09069539980358532
Scaled Linear Regression test score: 0.06506541031175017


In [30]:
# Scaled Linear Regression coefficients

importance = lr.coef_
intercept = lr.intercept_
# summarize feature importance
for i,v in enumerate(importance):
 print('Feature: %0d, Coefficient: %.5f' % (i,v))

print('Intercept: ', intercept)


Feature: 0, Coefficient: 0.37202
Feature: 1, Coefficient: -0.05068
Feature: 2, Coefficient: 3646488633812.39795
Feature: 3, Coefficient: 3457889852816.43604
Feature: 4, Coefficient: 2687720509300.11719
Feature: 5, Coefficient: 3160656086231.43311
Feature: 6, Coefficient: 2143929979714.11865
Feature: 7, Coefficient: 1322927916313.43311
Feature: 8, Coefficient: 2526090500414.50000
Feature: 9, Coefficient: 889928179138.48975
Feature: 10, Coefficient: 1073397904387.57812
Feature: 11, Coefficient: 707429125659.77002
Feature: 12, Coefficient: 408142969483.50140
Feature: 13, Coefficient: 61990431939.13567
Feature: 14, Coefficient: 691787335711.97681
Feature: 15, Coefficient: 294555556940.66913
Feature: 16, Coefficient: 2360246784771.24268
Feature: 17, Coefficient: 2467734489695.65527
Feature: 18, Coefficient: 3303439483606.28418
Feature: 19, Coefficient: 3011827021102.88721
Feature: 20, Coefficient: 3303439483606.31689
Feature: 21, Coefficient: 280070540396.06293
Feature: 22, Coefficient: 683

In [31]:
# Unscaled Linear Regression model

lr = LinearRegression()
lr.fit(X_train, y_train)

#predict
#prediction = lr.predict(X_test)

#actual
actual = y_test

train_score_lr = lr.score(X_train, y_train)
test_score_lr = lr.score(X_test, y_test)

print("Unscaled Linear Regression train score: {}".format(train_score_lr))
print("Unscaled Linear Regression test score: {}".format(test_score_lr))

Unscaled Linear Regression train score: 0.0907041500864082
Unscaled Linear Regression test score: 0.06530812198870983


In [32]:
# Get Unscaled Linear Regression coefficients - THIS ONE FOR EQUATION
# THIS ONE FOR EQUATION!!!

importance = lr.coef_
intercept = lr.intercept_
# summarize feature importance
for i,v in enumerate(importance):
 print('Feature: %0d, Coefficient: %.5f' % (i,v))

print('Intercept: ', intercept)


Feature: 0, Coefficient: 0.13409
Feature: 1, Coefficient: -0.02071
Feature: 2, Coefficient: -0.26758
Feature: 3, Coefficient: -0.21372
Feature: 4, Coefficient: 0.48130
Feature: 5, Coefficient: -0.40266
Feature: 6, Coefficient: -0.05327
Feature: 7, Coefficient: 0.40775
Feature: 8, Coefficient: 0.04817
Feature: 9, Coefficient: 0.47786
Feature: 10, Coefficient: 0.36562
Feature: 11, Coefficient: 0.44212
Feature: 12, Coefficient: 0.12857
Feature: 13, Coefficient: -1.42806
Feature: 14, Coefficient: 0.15997
Feature: 15, Coefficient: -0.14609
Feature: 16, Coefficient: 0.09517
Feature: 17, Coefficient: -0.54448
Feature: 18, Coefficient: -0.08785
Feature: 19, Coefficient: 0.06594
Feature: 20, Coefficient: 0.00164
Feature: 21, Coefficient: -0.27434
Feature: 22, Coefficient: -0.34218
Feature: 23, Coefficient: -0.13993
Feature: 24, Coefficient: -0.49030
Feature: 25, Coefficient: -0.29511
Feature: 26, Coefficient: -0.82142
Feature: 27, Coefficient: 2.83285
Feature: 28, Coefficient: 0.94910
Feature: 

In [None]:
# 0.13409(race_number) - 0.02071(avg(weight_carried) + tack_id_coefficient + course_type_coefficient + track_condition_coefficient + race_type_coefficient + jockey_coefficient + 6.353424818425532

In [None]:
# Extra Unscaled Linear Regression Check

In [33]:
# Script initialized model
model = LinearRegression()

In [34]:
model.fit(X_train, y_train)

In [35]:
# Script evaluates model. 
# Model optimization and evaluation process shown in Model_Iterations.csv and Old_Models folder.
# Predive power below 75%; however, a lower predictive power is to be expected given the nature of horse racing. 
model.score(X_test, y_test) 

0.06530812198870983

In [114]:
importance = model.coef_
intercept = model.intercept_
# summarize feature importance
for i,v in enumerate(importance):
 print('Feature: %0d, Coefficient: %.5f' % (i,v))

print('Intercept: ', intercept)

Feature: 0, Coefficient: 0.13409
Feature: 1, Coefficient: -0.02071
Feature: 2, Coefficient: -0.26758
Feature: 3, Coefficient: -0.21372
Feature: 4, Coefficient: 0.48130
Feature: 5, Coefficient: -0.40266
Feature: 6, Coefficient: -0.05327
Feature: 7, Coefficient: 0.40775
Feature: 8, Coefficient: 0.04817
Feature: 9, Coefficient: 0.47786
Feature: 10, Coefficient: 0.36562
Feature: 11, Coefficient: 0.44212
Feature: 12, Coefficient: 0.12857
Feature: 13, Coefficient: -1.42806
Feature: 14, Coefficient: 0.15997
Feature: 15, Coefficient: -0.14609
Feature: 16, Coefficient: 0.09517
Feature: 17, Coefficient: -0.54448
Feature: 18, Coefficient: -0.08785
Feature: 19, Coefficient: 0.06594
Feature: 20, Coefficient: 0.00164
Feature: 21, Coefficient: -0.27434
Feature: 22, Coefficient: -0.34218
Feature: 23, Coefficient: -0.13993
Feature: 24, Coefficient: -0.49030
Feature: 25, Coefficient: -0.29511
Feature: 26, Coefficient: -0.82142
Feature: 27, Coefficient: 2.83285
Feature: 28, Coefficient: 0.94910
Feature: 

In [37]:
Xnew = [[2, 120, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]]
# Making a prediction
ynew = model.predict(Xnew)
print("X=%s, Predicted=%s" % (Xnew, ynew))

X=[[2, 120, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]], Predicted=[6.72326615]
