In [1]:
!pip install -q -U kaggle_environments

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.8/953.8 kB[0m [31m24.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m840.2/840.2 kB[0m [31m18.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m178.7/178.7 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for Chessnut (setup.py) ... [?25l[?25hdone


In [None]:
import numpy as np
import pandas as pd
import random

import matplotlib.pyplot as plt
import seaborn as sns

from kaggle_environments import make, evaluate

In [3]:
%%writefile rock_agent.py

# 1. Агент, который всегда выбирает "камень"
def rock_agent(observation, configuration):
    return 0

Writing rock_agent.py


In [10]:
%%writefile paper_agent.py
# 2. Агент, который всегда выбирает "бумагу"
def paper_agent(observation, configuration):
    return 1

Writing paper_agent.py


In [21]:
%%writefile scissors_agent.py
# 3. Агент, который всегда выбирает "ножницы"
def scissors_agent(observation, configuration):
    return 2

Writing scissors_agent.py


In [None]:
%%writefile copy_opponent_agent.py
import random
# 4. Aгент, который производит то же самое действие, что и оппонент на прошлом ходу
def copy_opponent_agent(observation, configuration):
    if observation.step > 0:
        return observation.lastOpponentAction
    else:
        return random.randrange(0, configuration.signs)

In [32]:
%%writefile random_agent.py
import random
# 5. Aгент, который выбирает действие случайным образом
def random_agent(observation, configuration):
  return random.choice([0, 1, 2])

Overwriting random_agent.py


In [16]:
%%writefile alternating_agent.py

# 6. Aгент, который чередует действие
def alternating_agent(observation, configuration):
    if observation.step % 3 == 0:
        return 0
    elif observation.step % 3 == 1:
        return 1
    else:
        return 2

Writing alternating_agent.py


In [41]:
%%writefile win_past_agent.py
import random
# 7. Aгент, который выбирает победный ход в предыдущем ходу
def win_past_agent(observation, configuration):
    if observation.step == 0:
        return random.choice([0, 1, 2])
    else:
        opponent_last_move = observation.lastOpponentAction
        if opponent_last_move == 0:
            return 1
        elif opponent_last_move == 1:
            return 2
        else:
            return 0

Overwriting win_past_agent.py


In [None]:
%%writefile without_rock_agent.py
import random
# 8. Aгент, который исключает использование "камня"
def without_rock_agent(observation, configuration):
    return random.choice([1,2])

In [None]:
%%writefile without_paper_agent.py
import random
# 9. Aгент, который исключает использование "бумага"
def without_paper_agent(observation, configuration):
    return random.choice([0,2])

In [49]:
%%writefile without_scissors_agent.py
import random
# 10. Aгент, который исключает использование "ножницы"
def without_scissors_agent(observation, configuration):
    return random.choice([0,1])

Overwriting without_scissors_agent.py


In [53]:
%%writefile loss_past_agent.py
import random
# 11. Aгента, который выбирает не выигрышное дейсвтие для предыдущего хода оппонента
def loss_past_agent(observation, configuration):
    if observation.step == 0:
        return random.choice([0, 1, 2])
    else:
        opponent_last_move = observation.lastOpponentAction
        # Предполагаем, что противник может выбрать то, что проигрывает
        if opponent_last_move == 0:
            return random.choice([0, 2])
        elif opponent_last_move == 1:
            return random.choice([0, 1])
        else:
            return random.choice([1, 2])

Writing loss_past_agent.py


In [87]:
%%writefile reactionary_agent.py
import random
import math
# 12. Aгент, который реагирует на предыдущее действие игрока, если ход был проигран, то выбирается другой
def get_score(left_move, right_move):
    delta = (
        right_move - left_move
        if (left_move + right_move) % 2 == 0
        else left_move - right_move
    )
    return 0 if delta == 0 else math.copysign(1, delta)

def reactionary_agent(observation, configuration):
    global last_react_action
    if observation.step == 0:
        last_react_action = random.randrange(0, configuration.signs)
    elif get_score(last_react_action, observation.lastOpponentAction) <= 1:
        last_react_action = (observation.lastOpponentAction + 1) % configuration.signs

    return last_react_action

Overwriting reactionary_agent.py


In [98]:
%%writefile statistical_agent.py
import random
# 13. Агент, который анализирует статистику ходов противника
def statistical_agent(observation, configuration):
    global action_histogram
    if observation.step == 0:
        action_histogram = {}
        return
    action = observation.lastOpponentAction
    if action not in action_histogram:
        action_histogram[action] = 0
    action_histogram[action] += 1
    mode_action = None
    mode_action_count = None
    for k, v in action_histogram.items():
        if mode_action_count is None or v > mode_action_count:
            mode_action = k
            mode_action_count = v
            continue

    return (mode_action + 1) % configuration.signs

Writing statistical_agent.py


In [99]:
env = make("rps", configuration={"episodeSteps": 100})
# Турнир между агентами
agents = ['rock_agent', 'copy_opponent_agent', 'paper_agent',
          'random_agent', 'alternating_agent', 'scissors_agent',
          'win_past_agent', 'without_rock_agent', 'without_scissors_agent',
          'without_paper_agent', 'loss_past_agent', 'reactionary_agent',
          'statistical_agent']

# Добавляем .py к каждому агенту
agents_py = [i + ".py" for i in agents]
n_agents = len(agents)

# Инициализируем матрицу результатов
scores = np.zeros((n_agents, n_agents))

# Играем каждый агент против каждого
for i in range(n_agents):
    for j in range(i + 1, n_agents):
        result = evaluate("rps", [agents_py[i], agents_py[j]], configuration={"episodeSteps": 100}, num_episodes=1)
        scores[i][j] = result[0][0]  # сохраняем результат для агента i
        scores[j][i] = result[0][1]  # сохраняем результат для агента j

# Построение матрицы корреляций
df_scores = pd.DataFrame(scores, index=agents, columns=agents)
df_scores

Unnamed: 0,rock_agent,copy_opponent_agent,paper_agent,random_agent,alternating_agent,scissors_agent,win_past_agent,without_rock_agent,without_scissors_agent,without_paper_agent,loss_past_agent,reactionary_agent,statistical_agent
rock_agent,0.0,0.0,-99.0,0.0,0.0,99.0,-98.0,0.0,-47.0,54.0,49.0,-97.0,-98.0
copy_opponent_agent,0.0,0.0,0.0,0.0,-98.0,0.0,99.0,0.0,0.0,0.0,0.0,99.0,-21.0
paper_agent,99.0,0.0,0.0,0.0,0.0,-99.0,-97.0,-48.0,51.0,0.0,51.0,-97.0,-97.0
random_agent,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
alternating_agent,0.0,98.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
scissors_agent,-99.0,0.0,99.0,0.0,0.0,0.0,-97.0,46.0,0.0,-46.0,43.0,-99.0,-99.0
win_past_agent,98.0,-99.0,97.0,0.0,0.0,97.0,0.0,22.0,35.0,0.0,0.0,0.0,49.0
without_rock_agent,0.0,0.0,48.0,0.0,0.0,-46.0,-22.0,0.0,21.0,-24.0,0.0,-29.0,0.0
without_scissors_agent,47.0,0.0,-51.0,0.0,0.0,0.0,-35.0,-21.0,0.0,21.0,0.0,-28.0,-42.0
without_paper_agent,-54.0,0.0,0.0,0.0,0.0,46.0,0.0,24.0,-21.0,0.0,21.0,0.0,-34.0
