In [196]:
import importlib

In [434]:
import state
import predictor
importlib.reload(predictor)
importlib.reload(state)

<module 'state' from '/home/justiny/Documents/Projects/PyDominion/src/state.py'>

In [440]:
from aiutils import load
from mlp import PredictorMLP
from mlprunner import train_mlp
from config import GameConfig
from player import load_players
from enums import StartingSplit, FeatureType
from predictor import sample_training_batch, test_mlp
import numpy as np
import torch.nn as nn
import torch
import os
import pandas as pd
from sklearn.metrics import mean_squared_error
from state import ReducedStateFeature
from supply import Supply
from sklearn.preprocessing import StandardScaler

In [453]:
config = GameConfig(split=StartingSplit.StartingRandomSplit, prosperity=False, num_players=2, feature_type=FeatureType.ReducedFeature, sandbox=True)
players = load_players(['BM', 'BM'], models=None, train=True)

In [454]:
X, y = sample_training_batch(1000, -1, config, players)

  2%|▏         | 15/1000 [00:00<00:06, 144.25it/s]Generating training data from self-play...
100%|██████████| 1000/1000 [00:06<00:00, 144.88it/s]


In [360]:
X_rand_10k = X.copy()
y_rand_10k = y.copy()

In [432]:
model = PredictorMLP(config.feature_size, (config.feature_size + 1) // 2, 1)
criterion = nn.BCELoss()

In [None]:
project_root = '/home/justiny/Documents/Projects/PyDominion'
model_dir = os.path.join(project_root, 'models')

In [438]:
model_name = 'r-r-mlp-pred-bce-100-100-1'
model_path = os.path.join(model_dir, model_name)

In [439]:
y1 = np.array(y, dtype=np.float32)
train_mlp(X, y1, model, criterion, epochs=100, save_epochs=10, model_name=model_name, path=os.path.join(model_dir, model_name))

 15%|█▌        | 2644/17476 [00:00<00:00, 26438.89it/s]Generating dataset for dataloader...
100%|██████████| 17476/17476 [00:01<00:00, 13083.33it/s]
  0%|          | 0/100 [00:00<?, ?it/s]Training MLP...
100%|██████████| 100/100 [21:20<00:00, 12.81s/it]


In [329]:
y_pred = model(torch.tensor(X).cuda()).detach().cpu().numpy()
y_labels = np.array(y).reshape(y_pred.shape)

In [331]:
output = (y_pred > 0.5)
correct = (output == y_labels).sum() 
acc  = correct / len(y_labels)

In [332]:
acc

0.7853855651588282

In [87]:
mean_squared_error(y_pred, y_labels)

0.11117749924175713

Let's try a simpler logistic regression model.

In [163]:
from sklearn.linear_model import LogisticRegression

In [491]:
C = 1
max_iter = 10e5
model = LogisticRegression(max_iter=max_iter, C=C)

In [514]:
project_root = '/home/justiny/Documents/Projects/PyDominion'
model_dir = os.path.join(project_root, 'models')
model_name = 'r-r-mlp-pred-bce-10k-50--1'
model_path = os.path.join(model_dir, model_name)
model_path

'/home/justiny/Documents/Projects/PyDominion/models/r-r-mlp-pred-bce-10k-50--1'

In [516]:
model = load(model_path)
model

PredictorMLP(
  (fc1): Linear(in_features=21, out_features=11, bias=True)
  (fc2): Linear(in_features=11, out_features=1, bias=True)
  (sigmoid): Sigmoid()
  (relu): LeakyReLU(negative_slope=0.01)
  (tanh): Tanh()
)

In [492]:
reg = model.fit(X_rand_10k, y_rand_10k)

In [493]:
reg.score(X, y)

0.84

Copper, Curse, Estate, Duchy, Province, Silver, Gold

In [494]:
reg.coef_[0][7:14], reg.coef_[0][14:], reg.intercept_

(array([-6.25388341e-03, -1.99098762e+00,  1.95044124e+00,  5.81198855e+00,
         1.12843042e+01,  9.18387473e-03, -6.30930290e-03]),
 array([-4.52886488e-04,  1.98728527e+00, -1.95562452e+00, -5.96425122e+00,
        -1.12368846e+01, -2.42450308e-02,  6.80101895e-03]),
 array([-0.00034741]))

Save the logistic regression model.

In [369]:
import pickle

In [423]:
pickle.dump(reg, open(model_path, 'wb'))

Let's test the logistic regression model.

In [495]:
test_config = GameConfig(split=StartingSplit.StartingRandomSplit, prosperity=False, num_players=2, feature_type=FeatureType.ReducedFeature, sandbox=True)
test_players = load_players(['BM', 'BM'], models=None, train=False)

In [496]:
test_X, test_y = sample_training_batch(1000, -1, test_config, test_players)

  2%|▏         | 15/1000 [00:00<00:07, 137.80it/s]Generating training data from self-play...
100%|██████████| 1000/1000 [00:07<00:00, 142.48it/s]


In [371]:
reg

LogisticRegression(C=1, max_iter=1000000.0)

In [497]:
reg.score(test_X, test_y)

0.837

In [509]:
reg.coef_[0][:7], reg.coef_[0][7:14], reg.coef_[0][14:], reg.intercept_

(array([-0.0155073 ,  0.        ,  0.        ,  0.1493008 , -0.05038149,
         0.00025177, -0.01159875]),
 array([-6.25388341e-03, -1.99098762e+00,  1.95044124e+00,  5.81198855e+00,
         1.12843042e+01,  9.18387473e-03, -6.30930290e-03]),
 array([-4.52886488e-04,  1.98728527e+00, -1.95562452e+00, -5.96425122e+00,
        -1.12368846e+01, -2.42450308e-02,  6.80101895e-03]),
 array([-0.00034741]))

In [506]:
test_X_classes = reg.predict(test_X)
err = test_X[test_X_classes != test_y]
i=10

In [507]:
err[i][7:14], err[i][14:]

(array([7., 0., 3., 0., 4., 6., 5.], dtype=float32),
 array([ 7.,  0.,  3.,  0.,  4., 10.,  1.], dtype=float32))

In [508]:
reg.predict_proba([err[i]]), y[i]

(array([[0.46916815, 0.53083185]]), 1)

MLP Testing


In [None]:
project_root = '/home/justiny/Documents/Projects/PyDominion'
model_dir = os.path.join(project_root, 'models')
model_name = 'r-r-mlp-pred-bce-10k-50--1'
model_path = os.path.join(model_dir, model_name)
model_path

In [591]:
model = torch.load(model_path, map_location='cpu')

In [592]:
y_test_pred = model(torch.tensor(test_X)).detach().cpu().numpy()
y_test_labels = np.array(test_y).reshape(y_test_pred.shape)

In [533]:
output = (y_test_pred > 0.5)
correct = (output == y_test_labels).sum() 
incorrect_flags = (output != y_test_labels).flatten()
incorrect = test_X[incorrect_flags]
incorrect_prob = y_test_pred[incorrect_flags]
acc  = correct / len(y_test_labels)

In [519]:
acc

0.983

In [580]:
i=14

In [581]:
incorrect[i][7:14], incorrect[i][14:], incorrect_prob[i]b

(array([7., 0., 4., 0., 4., 6., 4.], dtype=float32),
 array([7., 0., 3., 0., 4., 7., 4.], dtype=float32),
 array([0.01174975], dtype=float32))