## Create Dataset

---

> Internship neural networks
>
> Group 4: Reinforcement learning
>
> Deadline 28.02.23 23:59

---

In [1]:
from itertools import count
import numpy as np
from tqdm import tqdm

In [2]:
%run "../../Environment/Connect4.ipynb"
%run "../../utils/utils.ipynb"
%run "../../OtherAgents/Agents.ipynb"
%run "../utils.ipynb"

In [3]:
env = Connect4()

In [4]:
def createDataset(trajectory, agent, opponent, num_episodes = 10):
    '''
    createDataset fills a list with trajectories (our dataset) where each trajectory represents 
    one game of connect 4 of two players  

    :trajectory: trajectory contains a dict with three keys (observations, actions, return_to_goes)
    :agent: our point of view (the agent is the player our decision transformer learns from)
    :opponent: opponent
    '''
    for i in tqdm(range(num_episodes)): 
        env.reset()
        state_p1 = env.board_state.copy()
        
        # randomly select who is the first and who is the second player
        j = np.random.choice([0,1])
        if j == 0:
            player1 = agent
            player2 = opponent
        else:
            player1 = opponent
            player2 = agent

        for t in count():
            
            # select action and make move player 1
            available_actions = env.get_available_actions()
            action_p1 = player1.select_action(state_p1, available_actions)
            state_p2, reward = env.make_move(action_p1, 'p1')
            
            if env.isDone:
                # j tells us if our agent is player 1 or player 2
                if j == 0:
                    # add the state, action and current reward to our traj
                    trajectory.push(state_p1, action_p1, reward)
                else:
                    # ngeative reward for a lose
                    trajectory.pop()
                    trajectory.push(state_p2, action_p2, -reward)
                
                # coumpute rtgs
                for i in range(len(trajectory.trajDict["rtgs"])):
                    trajectory.trajDict["rtgs"][i] = trajectory.compute_rtgs(i)
                # add traj to dataset and reset traj for new game
                dataset.append(trajectory.trajDict.copy())
                trajectory.reset()
                break
            
            if j == 0:
                # add the state, action and current reward to our traj
                trajectory.push(state_p1, action_p1, reward)

            # select action and make move player 2
            available_actions = env.get_available_actions()
            action_p2 = player2.select_action(state_p2, available_actions)
            next_state_p2, reward = env.make_move(action_p2, 'p2')
            
            if env.isDone:
                if j == 0:
                    trajectory.pop()
                    trajectory.push(state_p1, action_p1, -reward)
                else:
                    trajectory.push(state_p2, action_p2, reward)
                for i in range(len(trajectory.trajDict["rtgs"])):
                    trajectory.trajDict["rtgs"][i] = trajectory.compute_rtgs(i)
                dataset.append(trajectory.trajDict.copy())
                trajectory.reset()
                break
            
            if j == 1:
                trajectory.push(state_p2, action_p2, reward)

            state_p1 = next_state_p2

In [5]:
np.random.seed(0)
trajectory = Trajectory()
agent = RandomAgent()
opponent = RandomAgent()
num_episodes = 100000
dataset = []
createDataset(trajectory, agent, opponent, num_episodes)
agentName = agent.__class__.__name__
oppName = opponent.__class__.__name__
# write dataset into a file
write_list(dataset, agentName + "Vs" + oppName + "Dataset")

100%|██████████████████████████████████| 50000/50000 [4:03:55<00:00,  3.42it/s]


24950
Done writing list into a binary file


In [5]:
np.random.seed(0)
trajectory = Trajectory()
depth = 4
agent = NegaMaxAgent(env, depth)
opponent = RandomAgent()
num_episodes = 100000
dataset = []
createDataset(trajectory, agent, opponent, num_episodes)
agentName = agent.__class__.__name__
oppName = opponent.__class__.__name__
# write dataset into a file
write_list(dataset, agentName + str(depth) + "Vs" + oppName + "Dataset")

100%|██████████████████████████████████| 100000/100000 [39:36<00:00, 42.08it/s]


Done writing list into a binary file


In [5]:
np.random.seed(0)
trajectory = Trajectory()
depth = 4
agent = NegaMaxAgent(env, depth)
opponent = NegaMaxAgent(env, depth)
num_episodes = 50000
dataset = []
createDataset(trajectory, agent, opponent, num_episodes)
agentName = agent.__class__.__name__
oppName = opponent.__class__.__name__
# write dataset into a file
write_list(dataset, agentName + str(depth) + "Vs" + oppName + "Dataset")

100%|█████████████████████████████████| 50000/50000 [11:53:28<00:00,  1.17it/s]


Done writing list into a binary file
