<a href="https://colab.research.google.com/github/Aimkeys-Sir/agent-card/blob/main/cardagent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Welcome to Poker EA agent**

Switch the runtime type to GPU or TPU for faster runs.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

The **CardAgent** class has an `__init__` method that initializes various parameters and creates the layers of the neural network using the nn.Linear module.

The `network` method sets up the layers of the neural network and loads weights from a specified file path if `load_weights` is set to `True`.

The `forward` method defines the forward pass of the neural network. It applies `ReLU` activation functions to the first three layers and a `softmax` activation function to the last layer. If a mask is provided, it applies element-wise multiplication between the `output` and the `mask`.

The `remember` method is used to store experiences (`observation`, `move`, `reward`, `next_state`, `complete`) into the agent's memory deque.

The `train_memory` method trains the neural network using a mini-batch of experiences from the memory. It calculates the `target` value based on the reward and the `maximum Q-value` of the next state (if the episode is not complete). Then, it performs a forward pass, calculates the loss using mean squared error (MSE) loss, and performs backpropagation to update the weights.

The `replay_exp` method replays experiences from the memory and calls the `train_memory` method for each experience.

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import collections
import random
import math
import numpy as np
import torch.optim as optim
from typing import Type
import seaborn as sns
import matplotlib.pyplot as plt
import statistics
import pandas as pd


class CardAgent(nn.Module):
    def __init__(self, params):
        super().__init__()
        self.first_layer = params["first_layer_size"]
        self.second_layer = params["second_layer_size"]
        self.third_layer = params["third_layer_size"]
        self.gamma = params["gamma"]
        self.learning_rate = params["learning_rate"]
        self.memory = collections.deque(maxlen= params["memory_size"])
        self.batch_size = params["batch_size"]
        self.weights_path = params["weights_path"]
        self.optimizer = None
        self.load_weights = params["load_weights"]
        self.mask = None
        self.cum_reward = 0
        self.network()

    def network(self):
        self.requires_grad_ = False
        self.fc1 = nn.Linear(57, self.first_layer)
        self.fc2 = nn.Linear(self.first_layer, self.second_layer)
        self.fc3 = nn.Linear(self.second_layer, self.third_layer)
        self.fc4 = nn.Linear(self.third_layer, 60)

        if self.load_weights:
           self.model = self.load_state_dict(torch.load(self.weights_path))
           print("weights loaded")


    def forward(self, observation):
        x = F.relu(self.fc1(observation))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.softmax(self.fc4(x), dim=-1)

        if self.mask != None:
          print(f"before mask: \n{x}")
          return x * self.mask
        return x

    def remember(self, observation, move, reward, next_state, complete):
        self.memory.append((observation, move, reward, next_state, complete))

    def train_memory(self, observation, move, reward, next_state, complete):
        self.train()
        self.mask = None
        torch.set_grad_enabled(True)

        target = reward

        state_tensor = torch.tensor(np.expand_dims(observation, 0), dtype=torch.float32, requires_grad=True)
        next_state_tensor = torch.tensor(np.expand_dims(observation, 0), dtype=torch.float32, requires_grad = True)

        if not complete:
            target = reward + self.gamma * torch.max(self.forward(next_state_tensor))

        output = self.forward(state_tensor)
        try:
          target_f = output.clone()
          if target_f.shape == torch.Size([1,1,60]) :
            output = output[0]
            target_f = target_f[0]
          target_f[0][np.argmax(move)] = target
          target_f.detach()
          self.optimizer.zero_grad()
          loss = F.mse_loss(output, target_f)
          loss.backward()
          self.optimizer.step()
        except IndexError:
          print(output.shape)
          print(move)
          raise ValueError("what is this?")


    def replay_exp(self):
        if len(self.memory) > self.batch_size:
            minibatch = random.sample(self.memory, self.batch_size)
        else:
            minibatch = self.memory

        for observation, move, reward, next_state, complete in minibatch:
            self.train_memory(observation, move, reward, next_state, complete)



The function `to_cs(n_l)` takes a list of numbers `n_l` and maps them to the corresponding card names in the` cs` list. It returns a list of card names based on the input numbers.

params1 and params2 correspond to both player's agents.

In [None]:
cs =  [
  "ace_of_clubs",
  "ace_of_spades",
  "ace_of_hearts",
  "ace_of_diamonds",
  "2_of_clubs",
  "2_of_spades",
  "2_of_hearts",
  "2_of_diamonds",
  "3_of_clubs",
  "3_of_spades",
  "3_of_hearts",
  "3_of_diamonds",
  "4_of_clubs",
  "4_of_spades",
  "4_of_hearts",
  "4_of_diamonds",
  "5_of_clubs",
  "5_of_spades",
  "5_of_hearts",
  "5_of_diamonds",
  "6_of_clubs",
  "6_of_spades",
  "6_of_hearts",
  "6_of_diamonds",
  "7_of_clubs",
  "7_of_spades",
  "7_of_hearts",
  "7_of_diamonds",
  "8_of_clubs",
  "8_of_spades",
  "8_of_hearts",
  "8_of_diamonds",
  "9_of_clubs",
  "9_of_spades",
  "9_of_hearts",
  "9_of_diamonds",
  "10_of_clubs",
  "10_of_spades",
  "10_of_hearts",
  "10_of_diamonds",
  "jack_of_clubs",
  "jack_of_spades",
  "jack_of_hearts",
  "jack_of_diamonds",
  "queen_of_clubs",
  "queen_of_spades",
  "queen_of_hearts",
  "queen_of_diamonds",
  "king_of_clubs",
  "king_of_spades",
  "king_of_hearts",
  "king_of_diamonds",
  "black_joker",
  "red_joker",
  "choose clubs",
  "choose spades",
  "choose diamonds",
  "choose hearts",
  "complete build",
  "pick card"
]

params = dict()

params["first_layer_size"] = 256
params["second_layer_size"] = 128
params["third_layer_size"] = 84
params["learning_rate"] = 0.01
params["memory_size"] = 25000

#Don't load weights on the first pass since there are none
params["load_weights"] = True
params['train'] = True
params["epsilon_decay_linear"] = 0.01
params["episodes"] = 100
params["batch_size"] = 1000
params["gamma"] = 0.99


params1 = params.copy()
params2 = params.copy()

#create these folders in your drive to save the weights
params1["weights_path"] = "drive/MyDrive/pokerEa/weights/agent1/weights.h5"
params2["weights_path"] = "drive/MyDrive/pokerEa/weights/agent2/weights.h5"

questions = [28, 29, 30, 31, 51, 50, 49, 48, 47, 46, 45, 44]
aces = [0, 1, 2, 3]
punishers = [4, 5, 6, 7, 8, 9, 10, 11, 52, 53]
all_cards_without_jokers = list(range(52))

def to_cs(n_l):
    h = []
    for n in n_l:
        h.append(cs[n])
    return h


**Player** class represents a player in the card game.

`__init__(self, game, index)`: Initializes the player object with attributes such as `hand`, `build`, `game`, `asking`, `index`, `reward`, and `agent`.

`can_complete(self)`: Checks if the player can complete their build. Returns True if the build is not empty and the last card in the build is not in a specific list of cards (questions), aces, or punishers. Otherwise, returns False.

`check_in_white(self)`: Returns a list of cards from the player's hand that are in the white list of cards (`game.white_list(build=self.build)`).

`waste_card(self, card)`: Handles the logic when the player wastes a card. If the card is not in the player's hand, the `reward` is set to -1. Otherwise, if the card is in the white list, it is added to the player's build, removed from their hand, and the `reward` is set to 1. If the card is not in the white list, the `reward` is set to -1.

`pick_cards(self)`: Handles the logic when the player picks cards. Calls the `pick` method of the game and starts a new turn. Clears the player's build. The `reward` is set based on the number of remaining cards in the player's hand.

`complete_build(self)`: Handles the logic when the player completes their build. If the build is empty, the `reward` is set to -1. Otherwise, the player's build is wasted using the `waste` method of the game. If the player has no cards left and the build's last card is not in the list of questions, aces, or punishers, and there are no card-less players in the game, the player wins, the game is marked as complete, and the player's won count is incremented. Otherwise, if the player has no cards left and either the build's last card is in the list or there are card-less players in the game, the player is added to the list of card-less players. The player's build is cleared, and the `reward` is set to 3. If the player is not asking, a new turn is started.

`choose_flower(self, flower)`: Handles the logic when the player chooses a flower. If the player is asking, the chosen flower is set as the game's action, a new turn is started, and the asking flag is set to False. Otherwise, the reward is set to -1.

`do_move(self, move)`: Handles the logic for the player's move. The move parameter determines the action taken by the player. If the move is less than 54, the player wastes a card. If the move is between 54 and 57, the player chooses a flower. If the move is 58, the player completes their build. If the move is 59, the player picks cards. The reward is set based on the outcome of the move.

`one_hot_encoded_hand(self)`: Returns a one-hot encoded representation of the player's hand using a tensor of size 54, where the indices corresponding to the player's cards are set to 1.0.

`observation(self)`: Returns the observation of the player, which includes the one-hot encoded hand, the normalized value of the last card in the player's build or the top card of the game if the build is empty, the normalized value of the game's action

In [None]:
class Player():
    def __init__(self, game, index) -> None:
        self.hand = []
        self.build = []
        self.game = game
        self.asking = False
        self.index = index
        self.reward = 0
        self.won = 0
        self.agent = None

    def can_complete(self):
        if len(self.build) == 0 or self.build[-1] in questions:
            return False
        else:
            return True

    def check_in_white(self):
         return [card for card in self.hand if card in self.game.white_list(build=self.build)]

    def waste_card(self, card):
        if card not in self.hand:
          self.reward = -1
          return

        white = self.game.white_list(build=self.build)
        if card in white:
            self.build += [card]
            # print(f"build:{ self.build} card: {card}")
            self.hand = list(filter(lambda x: x != card, self.hand))
            self.reward = 1
        else:
            # print(f"build:{ self.build} card: {card}")
            self.reward = -1
            # wrong move

    def pick_cards(self):
        self.game.pick(player=self)
        self.game.new_turn()
        self.build.clear()
        self.reward = ((len(self.hand)-4) / 2) * - \
            1 if len(self.hand) > 6 else 0

    def complete_build(self):
        if len(self.build) == 0:
            self.reward = -1
            return

        self.game.waste(self)

        if len(self.hand) == 0 and self.build[-1] not in questions + aces + punishers and len(self.game.card_less) == 0:
            self.reward = 20
            self.game.complete = True
            self.game.winner = self.index
            self.won +=1
            self.build = []
            return
        elif len(self.hand) == 0 and (self.build[-1] in questions+aces+punishers or len(self.game.card_less)>0):
            self.game.card_less += [self.index]

        self.build.clear()
        self.reward = 1
        if not self.asking:
            self.game.new_turn()

    def choose_flower(self, flower):
        if self.asking:
            self.game.action = flower
            self.game.new_turn()
            self.asking = False
        else:
            self.reward = -1

    def do_move(self, move):
        self.reward = 0
        if move < 54:
            self.waste_card(move)
        elif move > 53 and move < 58:
            self.choose_flower(move-54)
        elif move == 58:
            self.complete_build()
        elif move == 59:
            self.pick_cards()

    def one_hot_encoded_hand(self):
        try:
            tensor = torch.zeros(54)
            tensor[self.hand] = 1.0
            return tensor
        except IndexError:
            print(self.hand, self.game.deck)
            raise ValueError("happened again")

    def observation(self):
        top = self.build[-1] if len(self.build)> 0 else self.game.top_card
        top_normal = top / 53

        actions = 0 if self.game.action == -1 else self.game.action + 1
        actions_normal = actions / 8
        cardless = 1 if len(self.game.card_less) > 0 else 0

        return torch.cat([self.one_hot_encoded_hand(), torch.tensor([top_normal, actions_normal, cardless], requires_grad = False)], dim=0)

    def mask(self):
        white = self.game.white_list(build = self.build)
        match = []
        for card in self.hand:
            if card in white:
                match.append(card)

        complete = 1 if self.can_complete() else 0
        can_pick = 0 if (len(self.build)>0 and self.build[-1] not in questions) else 1

        if self.asking:
            return torch.cat([torch.zeros(54),torch.tensor([1,1,1,1], requires_grad=False), torch.zeros(2)])
        else:
            match_t = torch.zeros(58)
            match_t[match] = 1.0

            return torch.cat([match_t, torch.tensor([complete, can_pick], requires_grad= False)], dim=0)




`__init__(self)`: Initializes the game object with attributes: `complete`, `wastes`, `deck`, `top_card`, `action`, `turn`, `card_less`, and `winner`.

`new_turn(self)`: Advances the `turn` to the next player by incrementing the turn attribute modulo 2.

`waste(self, player)`: Handles the logic when a player wastes their build. If the action is between 4 and 6 (punishers) and the top card of the player's build is less than 4, the player is required to have at least 2 aces in their build to proceed. If they don't, the top card is changed to the previous top card from the wastes, the turn is advanced to the next player, and the player's build is inserted back into the wastes before the previous top card. Otherwise, the player is set to be asking. In any other case, the player's build is added to the wastes. The top card is updated to the last card in the wastes.

`pick(self, player)`: Handles the logic when a player picks cards. The pick_num is determined based on the current action. If the action is 4, the player picks 2 cards. If the action is 5, the player picks 3 cards. If the action is 6, the player picks 5 cards. If the deck has fewer cards than the required number, the deck is replenished with the cards from the wastes (except the last card), and the top card is updated. The player picks random cards from the deck and adds them to their hand. If the player was previously card-less, they are removed from the card_less list.

`white_list(self, build=[])`: Returns the white list of cards based on the current action and the build. If there is an action, the white list is determined accordingly. If the action is between 0 and 3 (pattern), the white list includes cards that match the pattern and aces. If the action is 4, the white list includes cards 4, 5, 6, 7, and aces. If the action is 5, the white list includes cards 8, 9, 10, 11, and aces. If the action is 6, the white list includes cards 52, 53, and aces. If the action is 7 (jump), the white list includes cards with a value of 10. If there is no action and the build is empty, the white list is determined based on the top card. If the top card is 52, the white list includes cards with pattern 0 or 1, aces, and cards 52 and 53. If the top card is 53, the white list includes cards with pattern 2 or 3, aces, and cards 52 and 53. Otherwise, the white list includes cards with the same pattern or value as the top card, aces, and either card 52 or 53 based on the top card's pattern. If the build is not empty, the white list is determined based on the last card in the build. If the last card is a question, the white list includes cards with the same pattern or value as the last card, aces, and either card 52 or 53 based on the top card's pattern. If the last card is 52 or 53, the white list includes only cards 52 and 53. Otherwise, the white list includes cards with the same value as the last card.

In [None]:
class Game():
    def __init__(self) -> None:
        self.complete = True
        self.wastes = []
        self.deck = []
        self.top_card = None
        self.action = -1     # -1 - no action, 0-3 pattern(clubs, spades, diamonds, hearts), 4-6 punishers(pick 2,3,5), 7- jump
        self.turn = 0
        self.card_less = []
        self.winner = None

    def new_turn(self):
        self.turn = (self.turn+1) % 2

    def waste(self, player):
        self.action = -1
        top = player.build[-1]
        if self.action > 3 and top < 4:
            aces_in_build = list(filter(lambda x: x < 4, player.build))

            if len(aces_in_build) < 2:
                top = self.wastes[-1]
                self.turn = (self.turn+1) % 2
                self.wastes = self.wastes[0:-1] + \
                    player.build + self.wastes[-1:]
            else:
                player.asking = True
        elif top < 4:
            player.asking = True
            self.wastes += player.build
        else:
            self.wastes += player.build

        self.top_card = self.wastes[-1]

        if self.top_card in punishers:
            if math.floor(self.top_card/4) == 2:
                self.action = 4
            elif math.floor(self.top_card/4) == 3:
                self.action = 5
            elif self.top_card in [52, 53]:
                self.action = 6

    def pick(self, player):
        pick_num = 1
        if self.action == 4:
            pick_num = 2
            self.action = -1
        elif self.action == 5:
            pick_num = 3
            self.action = -1
        elif self.action == 6:
            pick_num = 5
            self.action = -1

        #if there's not enough cards on deck, reshuffle
        if len(self.deck) < pick_num:
            self.deck += self.wastes[0:-1]
            self.wastes = self.wastes[-1:]
            self.top_card = self.wastes[-1]

        picked_cards = random.sample(self.deck, pick_num)
        self.deck = list(filter(lambda x: x not in picked_cards, self.deck))

        player.hand += picked_cards

        #if the player was cardless, remove them from card_less list
        if player.index in self.card_less:
            self.card_less = list(filter(lambda x: x != player.index, self.card_less))

    def white_list(self, build=[]):
        if self.action != -1:
            if self.action < 4:
                white = list(filter(lambda x: x %
                             4 == self.action, all_cards_without_jokers))
                if self.action < 2:
                    white += [52]
                else:
                    white += [53]
                return white + aces
            elif self.action == 4:
                return [4, 5, 6, 7] + aces
            elif self.action == 5:
                return [8, 9, 10, 11] + aces
            elif self.action == 6:
                return [52, 53] + aces
            elif self.action == 7:
                return list(filter(lambda x: math.floor(x/4) == 10, all_cards_without_jokers))

        if len(build) == 0:
            if self.top_card == 52:
                white = list(filter(lambda x: x %
                             4 < 2, all_cards_without_jokers))
                white += aces + [52, 53]
            elif self.top_card == 53:
                white = list(filter(lambda x: x %
                             4 > 1, all_cards_without_jokers))
                white += aces + [52, 53]
            else:
                white = list(filter(lambda x: x % 4 == self.top_card % 4 or math.floor(
                    x/4) == math.floor(self.top_card/4), all_cards_without_jokers))
                if self.top_card % 4 < 2:
                    white += [52]
                else:
                    white += [53]
            return white
        else:
            last = build[-1]
            if last in questions:
                white = list(filter(lambda x: x % 4 == last % 4 or math.floor(
                    x/4) == math.floor(last/4), all_cards_without_jokers))
                if self.top_card % 4 < 2:
                    white += [52]
                else:
                    white += [53]
                return white + aces
            elif last == 52 or last == 53:
                return [52, 53]
            else:
                white = list(filter(lambda x: math.floor(
                    x/4) == math.floor(last/4), all_cards_without_jokers))
                return white



**Game Initialization:**

* The `initialize_game` function initializes the
game by setting up the deck, distributing cards to players, selecting a top card, and initializing other game-related variables.

**Play Function:**

* The `play` function defines the main logic for a player's turn.
* It first obtains the current state of the player using `player.observation()`.
* Then, it uses the agent's epsilon-greedy strategy to choose an action. If a random number is less than the agent's epsilon value, a random action is chosen. Otherwise, the agent's neural network is used to predict the action.
* The chosen action is executed by the player using `player.do_move(move)`.
* The resulting reward is printed and stored in the player's reward attribute.
* The next state is obtained, and the move is converted into a one-hot encoding representation.
* The agent's remember and train_memory methods are called to store the experience in the agent's memory and train the agent's neural network.

**Run Function:**

* The `run` function is the main driver of the game and training process.
* It initializes two instances of the CardAgent class, one for each player.
* It sets up the optimizers for both agents.
* It sets the player's agent attribute to the corresponding agent.
* The main loop iterates until the desired number of games (`params['episodes']`) is reached.
* Within the loop, if the game is complete, the game is initialized using `initialize_game`.
* Then, the players take turns playing their moves using the play function.
* The game progress is printed, and the step count is incremented. If the step count exceeds a threshold of 1000, the game is forced to complete.
* After the game is complete, the rewards and experiences are updated, and the agents' memories are replayed (`replay`) to train the neural networks.
*The loop continues until the desired number of games is completed.

In [None]:
def initialize_game(game, players):
    game.deck = list(range(54))

    for player in players:
        player.hand = random.sample(game.deck, 4)

        game.deck = list(filter(lambda x: x not in player.hand, game.deck))

    poss_top = list(
        filter(lambda x: x not in questions+aces+punishers, game.deck))
    game.top_card = random.choice(poss_top)
    game.wastes.append(game.top_card)

    game.deck = list(filter(lambda x: x != game.top_card, game.deck))
    game.winner = None
    game.complete = False


game = Game()
player1 = Player(game=game, index=0)
player2 = Player(game=game, index=1)

def play(player, agent):
    state = player.observation()
    print(f"\nplayer {player.index+1}")
    if random.uniform(0,1) < agent.epsilon:
        prediction = torch.rand(60)
        prediction = prediction * player.mask()
    else:
        with torch.no_grad():
            state = torch.tensor(np.expand_dims(state, 0), dtype=torch.float32, requires_grad=False)
            agent.mask = player.mask()
            prediction = agent(state)
            print(f"agentPred: {prediction}")

    move = np.argmax(prediction).cpu().detach().numpy().item()

    print(f"move: {move}:{to_cs([move])}")

    player.do_move(move)
    print(f"reward: {player.reward}")

    next_state = player.observation()
    m = np.eye(60)[np.argmax(prediction).numpy()]

    agent.cum_reward += player.reward

    agent.remember(observation=state, move=m, reward=player.reward, next_state=next_state, complete=player.game.complete)
    agent.train_memory(observation=state, move=m, reward=player.reward, next_state=next_state, complete=player.game.complete)


def std_mean_dev(array):
  return statistics.mean(array) , statistics.stdev(array)


def plot_seaborn(array_counter, array_score, y_name, train=True):
    sns.set(color_codes=True, font_scale=1.5)
    sns.set_style("white")
    plt.figure(figsize=(13,8))
    fit_reg = False if train== False else True
    ax = sns.regplot(
        x=np.array(array_counter),
        y=np.array(array_score),
        #color="#36688D",
        x_jitter=.1,
        scatter_kws={"color": "#36688D"},
        label='Data',
        fit_reg = fit_reg,
        line_kws={"color": "#F49F05"}
    )
    # Plot the average line
    y_mean = [np.mean(array_score)]*len(array_counter)
    ax.plot(array_counter,y_mean, label='Mean', linestyle='--')
    ax.legend(loc='upper right')
    ax.set(xlabel='# games', ylabel=y_name)
    plt.show()
    plt.savefig(f"drive/MyDrive/pokerEa/plots/{y_name}.png")

def run():
    agent1 = CardAgent(params=params1)
    agent1.optimizer = optim.Adam(
        agent1.parameters(), weight_decay=0, lr=params1['learning_rate'])
    agent2 = CardAgent(params=params2)
    agent2.optimizer = optim.Adam(
        agent2.parameters(), weight_decay=0, lr=params2['learning_rate'])
    games_count = 0
    steps = 0
    counter_plot = []
    score1_plot = []
    wins1_plot = []
    score2_plot = []
    wins2_plot = []

    player1.agent = agent1
    player2.agent = agent2

    def replay(agent):
        agent.replay_exp()
        model_weights = agent.state_dict()
        torch.save(model_weights, agent.weights_path)

    while games_count < params['episodes']:
        if game.complete:
            steps = 0
            initialize_game(game=game, players=[player1, player2])
            print("\nhands")

            print(to_cs(player1.hand))
            print(to_cs(player2.hand))

            print("\n top card")
            print(cs[game.top_card])

        while not game.complete:
            if game.turn == 0:
                if not params1['train']:
                    agent1.epsilon = 0.01
                else:
                    agent1.epsilon = 1 - (games_count * params1["epsilon_decay_linear"])

                play(player=player1, agent=agent1)
            elif game.turn == 1:
                if not params2['train']:
                    agent2.epsilon = 0.01
                else:
                    agent2.epsilon = 1 - \
                        (games_count * params1["epsilon_decay_linear"])
                play(player=player2, agent=agent2)


            print(f"game: {games_count}.  step: {steps} turn: {game.turn} score: {player1.won} - {player2.won}")
            steps += 1
            if steps>1000:
                game.complete = True
            if game.complete:
              if game.winner:
                for p in [player1, player2] :
                  if p.index != game.winner:
                    p.reward = -20
                    p.agent.cum_reward = -20
                    state = p.observation()
                    m = np.eye(60)
                    p.agent.remember(observation=state, move=m, reward=p.reward, next_state=state, complete=p.game.complete)
                    p.agent.train_memory(observation=state, move=m, reward=p.reward, next_state=state, complete=p.game.complete)

              games_count += 1

              score1_plot.append(agent1.cum_reward/steps)
              score2_plot.append(agent2.cum_reward/steps)

              wins1_plot.append(player1.won)
              wins2_plot.append(player2.won)

              counter_plot.append(games_count)

              replay(agent=agent1)
              replay(agent=agent2)

    mean1, stdev1 = std_mean_dev(score1_plot)
    print(f"\n\nplayer 1: \n mean reward: {mean1} \n stdev: {stdev1}")
    print("\nmean score vs games")
    plot_seaborn(counter_plot, score1_plot, "01_scores_plot")
    print("\nwins against games")
    plot_seaborn(counter_plot, wins1_plot, "01_wins_plot")

    mean2, stdev2 = std_mean_dev(score2_plot)
    print(f"\n\nplayer 2: \n mean reward: {mean2} \n stdev: {stdev2}")
    print("\nmean score vs games")
    plot_seaborn(counter_plot, score2_plot, "02_scores_plot")
    print("\nwins against games")
    plot_seaborn(counter_plot, wins2_plot, "02_wins_plot")

run()
