In [1]:
import pandas as pd
import pickle
import numpy as np
import matplotlib.pyplot
import sklearn
import sklearn.model_selection, sklearn.svm
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression,Lasso,Ridge
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error,mean_squared_error
import itertools
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pickle.load( open( "chess_df.p", "rb" ) )
opening_1h = pd.get_dummies(df['opening'])
df = df.drop(['opening'],axis=1)
df = df.join(opening_1h)

In [3]:
y = df['elo'].to_frame('elo')
x = df.drop(['elo'],axis=1)

In [6]:
for i in (df.columns):
    print(i)

advantage_at_5
advantage_at_10
nb_moves
nb_checks
move_quality_1
move_quality_2
move_quality_3
move_quality_4
move_quality_5
move_quality_6
move_quality_7
move_quality_8
elo
resignation
win_on_time
stalemate
abandon
checkmate
drawn_agreement
drawn_material
drawn_rep_or_50
analysis1
analysis2
analysis3
analysis4
A00
A01
A02
A03
A04
A05
A06
A07
A08
A09
A10
A11
A13
A15
A16
A17
A20
A21
A22
A25
A30
A31
A34
A35
A40
A41
A43
A44
A45
A46
A48
A49
A50
A51
A52
A53
A56
A57
A80
A84
A85
B00
B01
B02
B03
B06
B07
B08
B10
B12
B13
B15
B18
B20
B21
B22
B23
B24
B27
B28
B30
B31
B32
B33
B34
B40
B41
B43
B44
B50
B51
B52
B53
B54
B56
B70
B80
B90
C00
C01
C02
C05
C10
C11
C20
C21
C22
C23
C24
C25
C26
C28
C30
C31
C33
C34
C35
C37
C40
C41
C42
C43
C44
C45
C46
C47
C48
C50
C51
C53
C54
C55
C57
C58
C60
C61
C62
C64
C65
C66
C67
C68
C70
C77
C78
D00
D02
D03
D04
D06
D07
D08
D10
D11
D13
D20
D30
D31
D32
D35
D37
D43
D50
D53
D80
D85
E00
E10
E20
E60
E61
E70
E90
Other


In [6]:
x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(x,y,test_size=0.2,)
x_train.reset_index(inplace=True)
x_train.drop(['index'], axis=1, inplace=True)
y_train.reset_index(inplace=True)
y_train.drop(['index'], axis=1, inplace=True)
x_test.reset_index(inplace=True)
x_test.drop(['index'], axis=1, inplace=True)
y_test.reset_index(inplace=True)
y_test.drop(['index'], axis=1, inplace=True)

In [9]:
#Get best RF

depth = [9,10,11,]
min_samples_split=[9]
min_samples_leaf=[1]

maxi=0
best=''
best_model = 0
for d in depth:
    for mss in min_samples_split:
        for msl in min_samples_leaf:
            regr = RandomForestRegressor(n_estimators=500,max_depth=d, min_samples_split=mss, min_samples_leaf=msl, random_state=0)
            regr.fit(x_train,y_train)
            res = regr.score(x_test,y_test)
            if res>maxi:
                maxi=res
                best=d,mss,msl
                best_model=regr

print(best, maxi, mean_absolute_error(best_model.predict(x_test),y_test))

(11, 9, 1) 0.40269014138713555 412.4165557950952


In [10]:
#Linear Models

models = [LinearRegression(),Lasso(0.001),Lasso(0.05),Lasso(0.01),Lasso(0.05),Lasso(0.1),Lasso(0.5),Lasso(1),Ridge(0.01),Ridge(0.05),Ridge(0.1),Ridge(0.5),Ridge(1)]

maxi = 0
best = 0
for m in models:
    m.fit(x_train,y_train)
    res = m.score(x_test,y_test)
    if res>maxi:
        maxi=res
        best=m
        
print(best, res, mean_absolute_error(best.predict(x_test),y_test))

Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False) 0.4396045874824525 396.48207514962445


In [11]:
#MLP

models = [MLPRegressor()]

maxi = 0
best = 0
for m in models:
    m.fit(x_train,y_train)
    print(m.predict(x_test))
    res = m.score(x_test,y_test)
    if res>maxi:
        maxi=res
        best=m
        
print(best, res, mean_absolute_error(best.predict(x_test),y_test))

[1857.82860596  853.88979939 1699.03633762 ... 1113.58760148 1429.08043378
 1250.15607141]
MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
             beta_2=0.999, early_stopping=False, epsilon=1e-08,
             hidden_layer_sizes=(100,), learning_rate='constant',
             learning_rate_init=0.001, max_iter=200, momentum=0.9,
             n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
             random_state=None, shuffle=True, solver='adam', tol=0.0001,
             validation_fraction=0.1, verbose=False, warm_start=False) 0.4479915223607429 392.19381895339285


In [19]:
check_parameters = {
    'hidden_layer_sizes': [(100,75,50,),(100,100,75,50,)],
    'activation': ['relu'],
    'solver': ['adam'],
    'alpha': [0.001],
    'learning_rate': ['constant'],
    'max_iter': [300,400,500,600,700]
    
}

def train_evaluate(model,x_train,y_train,x_test,y_test):
    model.fit(x_train, y_train)
    pred = (model.predict(x_test))
    return(mean_absolute_error(pred,y_test))

mini=100000
best_model =0
values = [v for k,v in check_parameters.items()]
for i in itertools.product(*values):
    print(i)
    model = MLPRegressor(i[0],i[1],i[2],i[3],learning_rate=i[4],max_iter=i[5])
    score = train_evaluate(model,x_train,y_train,x_test,y_test)
    print(score)
    if score<mini:
        mini=score
        best_model=model
        
best_model_mlp = best_model
print(mini)

((100, 75, 50), 'relu', 'adam', 0.001, 'constant', 300)
381.6391152602505
((100, 75, 50), 'relu', 'adam', 0.001, 'constant', 400)
383.90598835857384
((100, 75, 50), 'relu', 'adam', 0.001, 'constant', 500)
374.8499219794599
((100, 75, 50), 'relu', 'adam', 0.001, 'constant', 600)
377.95594680354924
((100, 75, 50), 'relu', 'adam', 0.001, 'constant', 700)
381.66373491000525
((100, 100, 75, 50), 'relu', 'adam', 0.001, 'constant', 300)
384.19989763492816
((100, 100, 75, 50), 'relu', 'adam', 0.001, 'constant', 400)
388.23713700958734
((100, 100, 75, 50), 'relu', 'adam', 0.001, 'constant', 500)
381.8876299291615
((100, 100, 75, 50), 'relu', 'adam', 0.001, 'constant', 600)
388.3533751223379
((100, 100, 75, 50), 'relu', 'adam', 0.001, 'constant', 700)
383.81325412407557
374.8499219794599


In [15]:
besti = best_model_mlp
besti

MLPRegressor(activation='relu', alpha=0.1, batch_size='auto', beta_1=0.9,
             beta_2=0.999, early_stopping=False, epsilon=1e-08,
             hidden_layer_sizes=(100, 75, 50), learning_rate='constant',
             learning_rate_init=0.001, max_iter=300, momentum=0.9,
             n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
             random_state=None, shuffle=True, solver='adam', tol=0.0001,
             validation_fraction=0.1, verbose=False, warm_start=False)

In [21]:
pickle.dump(best_model_mlp, open('trainedmlp.p', 'wb'))

In [20]:
mean_absolute_error((best_model_mlp.predict(x_test)+besti.predict(x_test))/2,y_test)

375.2143485749936

In [35]:
y_test.isnull().sum().sum()

0

In [110]:
regr.fit(x_train,y_train)

  """Entry point for launching an IPython kernel.


RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=10,
                      max_features='auto', max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, n_estimators=10,
                      n_jobs=None, oob_score=False, random_state=0, verbose=0,
                      warm_start=False)

In [111]:
regr.score(x_train,y_train)

0.5711256156470043

In [112]:
regr.score(x_test,y_test)

0.36586541160044206