In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from tqdm import tqdm



In [4]:
def read_data(file):
    return pd.read_csv(file, delimiter=',')

In [5]:
def split_data(data):
    x = data[:, :5]
    y = data[:, 5]
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=37)
    return x_train, x_test, y_train, y_test

def transform(data):
    data = torch.tensor(data, dtype=torch.float32)
    return data

In [6]:
def preprocess(filename):
    df_raw = read_data(filename)
    print(df_raw.head())
    d_raw = df_raw.to_numpy() # data to binary?
    x, x_test, y, y_test = split_data(d_raw)

    x, x_test = transform(x), transform(x_test)
    
    return x, x_test, y, y_test

In [7]:
datafile = '../data/basic_strategy_agent.csv'
x_train, x_test, y_train, y_test = preprocess(datafile)

   Player sum  Dealer card  Usable ace  Double down allowed  Split allowed  \
0           7            5           0                    1              0   
1          10            5           0                    0              0   
2          20            5           0                    0              0   
3          13            2           0                    1              0   
4           9            7           0                    1              0   

   Action  
0       1  
1       1  
2       0  
3       0  
4       1  


In [8]:
def one_hot_encode(y, n):
    return torch.eye(n)[y]

def combine_input_and_label(x, y, n):
    y_one_hot = one_hot_encode(y, n)
    return torch.concat((x, y_one_hot), 1)

def generate_neg_labels(y, n):
    y_ = y.copy()
    for i in range(len(y)):
        negative_labels = [j for j in range(n) if j != y[i]]
        y_[i] = np.random.choice(negative_labels)

    return y_

In [9]:
x_pos = combine_input_and_label(x_train, y_train, 4)

y_neg = generate_neg_labels(y_train, 4)
x_neg = combine_input_and_label(x_train, y_neg, 4)

y_train_OH = one_hot_encode(y_train, 4)
y_test_OH = one_hot_encode(y_test, 4)


print(f"x_pos[0]: {x_pos[0]}")
print(f"x_neg[0]: {x_neg[0]}")
print(f"Train size: {len(x_train)}")
print(f"Test size: {len(x_test)}")

x_pos[0]: tensor([14., 10.,  0.,  0.,  0.,  0.,  1.,  0.,  0.])
x_neg[0]: tensor([14., 10.,  0.,  0.,  0.,  1.,  0.,  0.,  0.])
Train size: 12118
Test size: 3030


## Forward-Forward

In [10]:
from FFNN import FFNN
import torch
import csv

if torch.cuda.is_available():
    device = torch.device('cuda')
    print("using gpu: ", torch.cuda.get_device_name())
else:
    device = torch.device('cpu')
    print("using cpu")

using cpu


In [None]:
# # Hyperparameters
# layers = [9, 2000, 2000, 2000, 2000]
# bias = True
# threshold = 10
# epochs = 60
# learning_rate = 0.01

# model = FFNN(
#     device=device,
#     layers=layers,
#     bias=bias,
#     threshold=threshold,
#     learning_rate=learning_rate,
#     epochs=epochs,
#     batch_size=100
#     ).to(device)

# model.train(x_pos, x_neg)
# model.save('model.pth')

In [10]:
def cross_validation(x_pos, x_neg,architectures, thresholds, learning_rates, n_epochs, n_folds, batch_size, lr_decay):
    
    size_subset = len(x_pos) // n_folds
    validation_risks = []
    
    for architecture in tqdm(architectures):
        for threshold in thresholds:
            for lr in learning_rates:
                    print(f"Current Model:{architecture}, thr={threshold}, lr={lr}")
                    print("-"*40)
                    val_risks_model = []
                    
                    for fold in range(n_folds):
                        print(f"Fold {fold}")
                        print("-"*40)
                        
                        # Split data into training and validation
                        start_idx = fold * size_subset
                        end_idx = (fold + 1) * size_subset
                        x_pos_train = torch.cat((x_pos[:start_idx], x_pos[end_idx:]))
                        x_neg_train = torch.cat((x_neg[:start_idx], x_neg[end_idx:]))
                        x_val = x_train[start_idx:end_idx]
                        y_val = y_train[start_idx:end_idx]
                        
                        # Initialize current model
                        model = FFNN(
                            device=device,
                            layers=architecture,
                            bias=True,
                            threshold=threshold,
                            learning_rate=lr,
                            epochs=n_epochs,
                            batch_size=batch_size,
                            lr_decay=lr_decay
                        ).to(device)
                        
                        # Train model
                        model.train(x_pos_train, x_neg_train)
                        
                        # Compute validation risk
                        predictions = model.predict_accumulated_goodness(x_val) # juiste risk?
                        val_risk = 1 - predictions.eq(torch.from_numpy(y_val).to(device)).float().mean().item()
                        val_risks_model.append(val_risk)
                        
                        print(f"\n validation risk: {val_risk}")
                        
                    avg_val_risk = np.mean(val_risks_model)
                    validation_risks.append((architecture, threshold, lr, avg_val_risk))
                    #model.save_model(f'../models/model_{architecture}_{threshold}_{lr}_{avg_val_risk}.pth')
                    
                    with open('../data/cxv_results.csv', 'a', newline='') as file:
                        writer = csv.writer(file)
                        writer.writerow([architecture, threshold, lr, avg_val_risk])
                        
                    print("-"*40)
                    print(f"validation risks for model {architecture}, thr={threshold}, lr={lr}: {val_risks_model}")
                    print(f"average validation risk: {avg_val_risk}\n")
    
    print("total validation risks:" , validation_risks)  
    min_risk_model = min(validation_risks, key=lambda x: x[-1])
    print(f"Best model: {min_risk_model}")   
    return min_risk_model               

In [None]:
# Hyperparameters
architectures = [
    [9,30,30],
    [9,100,100],
#     [9,500,500],
#     [9,30,30,30],
#     [9,50,50,50],
#     [9,100,100,100],
#     [9,200,200,200],
#     [9,500,500,500],
#     [9,1000,1000,1000],
#     [9,30,30,30,30],
#     [9,50,50,50,50],
#     [9,100,100,100,100],
#     [9,500,500,500,500],
#     [9,1000,1000,1000,1000],
#     [9,2000,2000,2000,2000],
#     [9,50,50,50,50,50],
#     [9,100,100,100,100,100],
#     [9,500,500,500,500,500],
#     [9,2000,2000,2000,2000,2000]
]
thresholds = [0.005, 
            #  0.1, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1, 3, 4, 5, 6, 7, 8, 10
            ]
learning_rates = [0.00001,
                 # 0.0001,0.001,0.01,0.03
                  ]
n_epochs = 10
n_folds = 10
batch_size = 64
lr_decay = False

best_model = cross_validation(
    x_pos, 
    x_neg, 
    architectures,
    thresholds,
    learning_rates,
    n_epochs,
    n_folds,
    batch_size,
    lr_decay=lr_decay
)

In [11]:
# Best model hyperparameters
architecture_best = [9,100,100]
threshold_best = 0.5
learning_rate_best = 0.01
epochs = 100
batch_size = 1
lr_decay = False

print(f"Best model: {architecture_best}, thr={threshold_best}, lr={learning_rate_best}")

model = FFNN(
    device=device,
    layers=architecture_best,
    threshold=threshold_best,
    learning_rate=learning_rate_best,
    epochs=epochs,
    batch_size=batch_size,
    lr_decay=lr_decay
).to(device)

Best model: [9, 100, 100], thr=0.5, lr=0.01


In [None]:
model.train(x_pos, x_neg)
model.save_model('../models/best_model.pth')

In [15]:
model.load_model('../models/DQFFNN_model.pth')

torch.set_printoptions(profile="full")
print(x_test)
print(y_test)
print(model.predict_accumulated_goodness(x_test.to(device)))


tensor([[17., 10.,  0.,  0.,  0.],
        [13.,  3.,  0.,  1.,  0.],
        [13.,  9.,  0.,  1.,  0.],
        [14., 10.,  0.,  0.,  0.],
        [20., 10.,  0.,  1.,  1.],
        [ 9., 10.,  0.,  0.,  0.],
        [12., 10.,  0.,  1.,  1.],
        [21.,  4.,  1.,  0.,  0.],
        [19.,  8.,  0.,  0.,  0.],
        [17.,  7.,  0.,  0.,  0.],
        [20.,  3.,  1.,  1.,  0.],
        [17.,  4.,  0.,  1.,  0.],
        [10.,  3.,  0.,  1.,  0.],
        [ 9., 10.,  0.,  0.,  0.],
        [16.,  7.,  0.,  0.,  0.],
        [20., 10.,  0.,  1.,  1.],
        [16.,  6.,  0.,  0.,  0.],
        [18.,  6.,  0.,  0.,  0.],
        [ 6.,  7.,  0.,  1.,  0.],
        [15., 10.,  0.,  0.,  0.],
        [14.,  4.,  0.,  1.,  0.],
        [21., 10.,  0.,  0.,  0.],
        [14.,  8.,  0.,  1.,  0.],
        [ 9., 10.,  0.,  0.,  0.],
        [16., 10.,  0.,  0.,  0.],
        [12., 10.,  0.,  1.,  0.],
        [17.,  3.,  0.,  1.,  0.],
        [11.,  8.,  0.,  1.,  0.],
        [19.,  1.,  

In [16]:
test_error = 1 - model.predict_accumulated_goodness(x_test).eq(torch.from_numpy(y_test).to(device)).float().mean().item() # heet dit accuracy?
print(test_error)

0.45247524976730347


## Backpropegation

In [26]:
def train(model, x, y, epochs, loss_function, optimizer, batch_size):
    
    model.train()
    num_batches = len(x) // batch_size
    
    for epoch in range(epochs):
        losses = []
        for batch in tqdm(range(num_batches)):
            start_idx = batch * batch_size
            end_idx = start_idx + batch_size
            
            x_batch = x[start_idx:end_idx]
            y_batch = y[start_idx:end_idx]
            
            predictions = model(x_batch)
            loss = loss_function(predictions, y_batch)
            losses.append(loss.item())
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            
        print(f"epoch: {epoch + 1}/{epochs} avg. loss: {sum(losses)/len(losses)}")

In [27]:
def test(model, x_test, y_test, loss_function):
    
    model.eval()
    
    with torch.no_grad():
        predictions = model(x_test)
        test_loss = loss_function(predictions, y_test).item()
        
    print(f"avg. test loss: {test_loss}")   
    
def predict_actions(model, x):
    model.eval()
    
    with torch.no_grad():
        predictions = model(x)
        return predictions.argmax(1)

In [28]:
from backpropNN import BackpropNN

input_size = 5
output_size = 4

# hyperparameters
hidden_size = 30
activation_function = nn.ReLU()

# fully connected neural network with 4 hidden layers
backprop_model = BackpropNN(input_size, output_size, hidden_size, activation_function)

In [32]:
# hyperparameters
epochs = 100
batch_size = 64
learning_rate = 0.001

optimizer = torch.optim.Adam(backprop_model.parameters(), lr=learning_rate);
loss_fn = nn.CrossEntropyLoss()

train(backprop_model, x_train, y_train_OH, epochs, loss_fn, optimizer, batch_size)
torch.save(backprop_model.state_dict(), '../models/backprop_model.pth')

100%|██████████| 26/26 [00:00<00:00, 519.77it/s]


epoch: 1/10 avg. loss: 1.3648410806289086


100%|██████████| 26/26 [00:00<00:00, 806.64it/s]


epoch: 2/10 avg. loss: 1.268617909688216


100%|██████████| 26/26 [00:00<00:00, 812.50it/s]


epoch: 3/10 avg. loss: 1.1571954580453725


100%|██████████| 26/26 [00:00<00:00, 649.74it/s]


epoch: 4/10 avg. loss: 1.072003308397073


100%|██████████| 26/26 [00:00<00:00, 788.24it/s]


epoch: 5/10 avg. loss: 1.028952459876354


100%|██████████| 26/26 [00:00<00:00, 896.65it/s]


epoch: 6/10 avg. loss: 1.009978154530892


100%|██████████| 26/26 [00:00<00:00, 758.64it/s]


epoch: 7/10 avg. loss: 0.9906617953227117


100%|██████████| 26/26 [00:00<00:00, 684.24it/s]


epoch: 8/10 avg. loss: 0.970029585636579


100%|██████████| 26/26 [00:00<00:00, 837.89it/s]


epoch: 9/10 avg. loss: 0.9461542574258951


100%|██████████| 26/26 [00:00<00:00, 896.49it/s]


epoch: 10/10 avg. loss: 0.9243824871686789


In [152]:
test(backprop_model, x_test, y_test_OH, loss_fn)

avg. test loss: 0.9507748484611511


In [153]:
# print(torch.from_numpy(y_test))
# print(predict_action(backprop_model, x_test))
accuracy = predict_actions(backprop_model, x_test).eq(torch.from_numpy(y_test)).float().mean().item() # heet dit accuracy?
print(accuracy)

tensor([0, 1, 1,  ..., 1, 0, 0])
tensor([0, 0, 0,  ..., 0, 0, 0])
0.5006591975688934
