In [None]:
# ML_bot training

import sys
sys.path.append("./models")

from ML_bot import train_model

train_model("NN")

In [1]:
# Round robin tournament, 3 bots

import sys
sys.path.append("./models")
from ML_bot import MLPlayingBot
from schnapsen.bots import RdeepBot
from schnapsen.bots import RandBot
from Deepbot import DeepLearningBot
from schnapsen.game import SchnapsenGamePlayEngine
import pathlib
import random


engine = SchnapsenGamePlayEngine()
model_dir = "ML_models"
model_name = "simple_model"
model_location = pathlib.Path(model_dir) / model_name

model_path = "./models/model_20250112_044306_epochs25_batch128_lr0.004.pt"

rng = random.Random(42)

bot1 = MLPlayingBot(model_location, name="MLBot")
bot2 = RandBot(rng, "RandBot")
bot3 = RdeepBot(num_samples=5, depth=2, rand=rng, name="RdeepBot")
bot4 = DeepLearningBot(model_path=model_path, input_size=173, hidden_size=64, name="DLbot")

def round_robin_tournament():
    wins_MLBot = 0
    wins_RDeep = 0
    wins_Randy = 0
    for i in range (500):

        if i + 1 % 500 == 0:
            print(f"Current progress: game {i}")

        winner_id, game_points, score = engine.play_game(bot1, bot4, random.Random(i))

        if winner_id._Bot__name == "DLbot":
            wins_MLBot +=1
        elif winner_id._Bot__name == "RdeepBot":
            wins_RDeep +=1
        else:
            wins_Randy +=1

        winner_id2, game_points2, score2 = engine.play_game(bot1, bot3, random.Random(i))

        if winner_id2._Bot__name == "DLbot":
            wins_MLBot +=1
        elif winner_id2._Bot__name == "RdeepBot":
            wins_RDeep +=1
        else:
            wins_Randy +=1

        winner_id3, game_points3, score3 = engine.play_game(bot4, bot3, random.Random(i))

        if winner_id3._Bot__name == "DLbot":
            wins_MLBot +=1
        elif winner_id3._Bot__name == "RdeepBot":
            wins_RDeep +=1
        else:
            wins_Randy +=1



    print (f"DLBot: {wins_MLBot}")
    print (f"Randy: {wins_Randy}")
    print (f"Rdeep: {wins_RDeep}")

round_robin_tournament()

  state_dict = torch.load(model_path, map_location=self.device)


Current progress: game 0
DLBot: 373
Randy: 518
Rdeep: 609


In [12]:
# 1v1 arena

import sys
sys.path.append("./models")
from ML_bot import MLPlayingBot
from schnapsen.bots import RdeepBot
from schnapsen.bots import RandBot
from Deepbot import DeepLearningBot
from schnapsen.game import SchnapsenGamePlayEngine
import pathlib
import random


engine = SchnapsenGamePlayEngine()
model_dir = "ML_models"
model_name = "simple_model"
model_location = pathlib.Path(model_dir) / model_name

model_path = "./models/model_20250112_044306_epochs25_batch128_lr0.004.pt"

rng = random.Random(42)

bot1 = MLPlayingBot(model_location, name="MLBot")
bot2 = RandBot(rng, "RandBot")
bot3 = RdeepBot(num_samples=10, depth=3, rand=rng, name="RdeepBot")
bot4 = DeepLearningBot(model_path=model_path, input_size=173, hidden_size=64, name="DLbot")
bot5 = RdeepBot(num_samples=20, depth=5, rand=rng, name="RdeepBot2")

def matches_1v1(bot1, bot2):
    wins_bot1 = 0
    wins_bot2 = 0
    
    for i in range (100):

        if i + 1 % 500 == 0:
            print(f"Current progress: game {i}")

        winner_id, game_points, score = engine.play_game(bot1, bot2, random.Random(i))

        if winner_id._Bot__name == str(bot1):
            wins_bot1 +=1
        elif winner_id._Bot__name == str(bot2):
            wins_bot2 +=1


    print (f"{str(bot1)} wins: {wins_bot1}")
    print (f"{str(bot2)} wins: {wins_bot2}")

matches_1v1(bot3, bot5)


RdeepBot wins: 41
RdeepBot2 wins: 59


In [None]:
# Deep learning training

import sys
sys.path.append("./models")
from Deepbot import train_DL_model, gpu_check

# gpu_check()

data_file = "./ML_replay_memories/replay_memory.txt"  # Replace with your data file path
output_model_path = "./models"
input_dim = 173
hidden_dim = 64
train_DL_model(data_file, output_model_path, input_dim, hidden_dim, batch_size = 128, epochs = 25, lr = 0.004)

In [13]:
#Data generation

import sys
sys.path.append("./data_gen")

from data_generation import create_replay_memory_dataset
import random
from schnapsen.bots import RandBot
from schnapsen.bots import RdeepBot
from schnapsen.bots import BullyBot

rng = random.Random(42)

bot1 = RandBot(rng)
bot2 = RdeepBot(num_samples=5, depth=3, rand=rng)
bot4 = BullyBot(rng)

create_replay_memory_dataset(bot1=bot2, bot2=bot2, num_of_games = 10000, parallel = True, overwrite = True)

Existing dataset found at ML_replay_memories/replay_memory.txt. Overwriting...
Game 500 completed.
Game 1000 completed.
Game 1500 completed.
Game 2000 completed.
Game 2500 completed.
Game 3000 completed.
Game 3500 completed.
Game 4000 completed.
Game 4500 completed.
Game 5000 completed.
Game 5500 completed.
Game 6000 completed.
Game 6500 completed.
Game 7000 completed.
Game 7500 completed.
Game 8500 completed.
Game 9000 completed.
Game 9500 completed.
Game 8000 completed.
Game 10000 completed.


In [None]:
# DeepCFR training
import torch
import sys
sys.path.append("./models")
sys.path.append("./CFRmodels")


from DeepCFR import load_txt_dataset, create_data_loader, DeepCFR
import numpy as np

# Load dataset from text file
file_path = "./ML_replay_memories/replay_memory.txt"  # Replace with your dataset file
features, regrets = load_txt_dataset(file_path, label_type="regret")  # Load regret data
_, strategies = load_txt_dataset(file_path, label_type="strategy")   # Load strategy data

# Verify dataset dimensions
print("Features shape:", features.shape)  # Should match number of rows and feature length
print("Regrets shape:", np.array(regrets).shape)  # Should match rows and action size
print("Strategies shape:", np.array(strategies).shape)  # Should match rows and action size

# Create DataLoaders for training
regret_loader = create_data_loader(features, regrets, batch_size=64, shuffle=True)
strategy_loader = create_data_loader(features, strategies, batch_size=64, shuffle=True)

# Initialize the DeepCFR model
deep_cfr = DeepCFR(input_size=features.shape[1], action_size=len(regrets[0]))

for batch_idx, (states, labels) in enumerate(regret_loader):
    print(f"Batch {batch_idx + 1}: States sample indices:")
    print(states[:5])  # Assuming states contain identifiable indices
    if batch_idx == 2:  # Inspect only the first three batches
        break

# Train regret network
print("Training regret network...")
deep_cfr.train_regret_network(regret_loader, epochs=10)

# Train strategy network
print("Training strategy network...")
deep_cfr.train_strategy_network(strategy_loader, epochs=10)

torch.save(deep_cfr.regret_net.state_dict(), "./CFRmodels/regret_net.pth")
torch.save(deep_cfr.strategy_net.state_dict(), "./CFRmodels/strategy_net.pth")

print("Trained networks saved successfully!")
