In [3]:
!pip install -q -U kaggle_environments

[K     |████████████████████████████████| 1.9 MB 2.1 MB/s 
[K     |████████████████████████████████| 62 kB 1.4 MB/s 
[?25h

In [4]:
import numpy as np
import pandas as pd
import random

import matplotlib.pyplot as plt
import seaborn as sns

from kaggle_environments import make, evaluate


1) Бот - всегда "камень"

In [39]:
%%writefile rock_agent.py

def rock_agent(observation, configuration):
  return 0

Overwriting rock_agent.py


In [41]:
evaluate(
  "rps", #environment to use - no need to change
  ["rock_agent.py", "statistical.py"], #agents to evaluate
  configuration={"episodeSteps": 100} #number of episodes 
)

[[-98.0, 98.0]]

2) Бот - всегда "бумага"

In [42]:
%%writefile paper_agent.py

def paper_agent(observation, configuration):
  return 1

Overwriting paper_agent.py


In [43]:
evaluate(
  "rps", #environment to use - no need to change
  ["paper_agent.py",  "statistical.py"], #agents to evaluate
  configuration={"episodeSteps": 100} #number of episodes 
)

[[-97.0, 97.0]]

3) Бот - всегда "ножницы"

In [9]:
%%writefile scissors_agent.py

def scissors_agent(observation, configuration):
  return 2

Writing scissors_agent.py


In [44]:
evaluate(
  "rps", #environment to use - no need to change
  ["scissors_agent.py",  "statistical.py"], #agents to evaluate
  configuration={"episodeSteps": 100} #number of episodes 
)

[[-99.0, 99.0]]

4) Бот, копирующий оппонента

In [45]:
%%writefile copy_opponent.py
import random

def copy_opponent(observation, configuration):
  if observation.step > 0:
    return observation.lastOpponentAction
  #initial step
  else:
    return random.randrange(0, configuration.signs)

Overwriting copy_opponent.py


In [46]:
evaluate(
  "rps", #environment to use - no need to change
  ["copy_opponent.py",  "statistical.py"], #agents to evaluate
  configuration={"episodeSteps": 100} #number of episodes 
)

[[-20.0, 20.0]]

5) Случайный бот

In [48]:
%%writefile random_agent.py
import random

def random_agent(observation, configuration):
  return random.randrange(random.randrange(0, 3), configuration.signs)

Overwriting random_agent.py


In [49]:
evaluate(
  "rps", #environment to use - no need to change
  ["random_agent.py",  "statistical.py"], #agents to evaluate
  configuration={"episodeSteps": 100} #number of episodes 
)


[[-20.0, 20.0]]

6) Бот, повторяющий свое последнее действие

In [50]:
%%writefile hit_the_last_own_action.py

my_last_action = 0

def hit_the_last_own_action(observation, configuration):
  global my_last_action
  my_last_action = (my_last_action + 1) % 3
    
  return my_last_action

Overwriting hit_the_last_own_action.py


In [51]:
evaluate(
  "rps", #environment to use - no need to change
  ["hit_the_last_own_action.py",  "statistical.py"], #agents to evaluate
  configuration={"episodeSteps": 100} #number of episodes 
)

[[0, 0]]

7) Бот с равновесием Нэша

In [52]:
%%writefile nash_equilibrium.py

import random

def nash_equilibrium(observation, configuration):
  return random.randint(0, 2)

Overwriting nash_equilibrium.py


In [53]:
evaluate(
  "rps", #environment to use - no need to change
  ["nash_equilibrium.py",  "statistical.py"], #agents to evaluate
  configuration={"episodeSteps": 100} #number of episodes 
)

[[0, 0]]

8) Статистический бот

In [55]:
%%writefile statistical.py

action_histogram = {}


def statistical(observation, configuration):
  global action_histogram
  if observation.step == 0:
    action_histogram = {}
    return
  action = observation.lastOpponentAction
  if action not in action_histogram:
    action_histogram[action] = 0
  action_histogram[action] += 1
  mode_action = None
  mode_action_count = None
  for k, v in action_histogram.items():
    if mode_action_count is None or v > mode_action_count:
      mode_action = k
      mode_action_count = v
      continue

  return (mode_action + 1) % configuration.signs

Overwriting statistical.py


In [56]:
evaluate(
  "rps", #environment to use - no need to change
  ["statistical.py", "rock_agent.py"], #agents to evaluate
  configuration={"episodeSteps": 100} #number of episodes 
)

[[98.0, -98.0]]

9) Агент Джонсона

In [57]:
%%writefile Johnson_agent.py

import numpy as np
import collections
def Johnson_agent(observation, configuration):
  k = 2
  global table, action_seq
  if observation.step % 25 == 0: # обновляем таблицу каждые 25 шагов. 
    action_seq, table = [], collections.defaultdict(lambda: [1, 1, 1])
  if len(action_seq) <= 2 * k + 1:
    action = int(np.random.randint(3))
    if observation.step > 0:
      action_seq.extend([observation.lastOpponentAction, action])
    else:
      action_seq.append(action)
      return action
  # вносим изменения
  key = ''.join([str(a) for a in action_seq[:-1]])
  table[key][observation.lastOpponentAction] += 1
  # вносим изменения последовательности действий
  action_seq[:-2] = action_seq[2:]
  action_seq[-2] = observation.lastOpponentAction
  # предсказание следующего хода опонента
  key = ''.join([str(a) for a in action_seq[:-1]])
  if observation.step < 50:
    next_opponent_action_pred = np.argmax(table[key])
  else:
    scores = np.array(table[key])
    next_opponent_action_pred = np.random.choice(3, p=scores/scores.sum())
  # совершаем действия
  action = (next_opponent_action_pred + 1) % 3
  # меняем стратегию, если высок шанс проигрыша
  if observation.step > 90:
    action = next_opponent_action_pred
  action_seq[-1] = action
  return int(action)

Overwriting Johnson_agent.py


In [58]:
evaluate(
  "rps", #environment to use - no need to change
  ["Johnson_agent.py",  "statistical.py"], #agents to evaluate
  configuration={"episodeSteps": 100} #number of episodes
)

[[0, 0]]

10) Бот, реализующий последовательность "бумага, камень, ножницы"

In [36]:
%%writefile prs_sequence.py 

def prs_sequence(observation, configuration):
  if observation.step % 3 == 1:
    return 0
  elif observation.step % 3 == 2:
    return 1
  else:
    return 2 

Overwriting prs_sequence.py


In [59]:
evaluate(
  "rps", #environment to use - no need to change
  ["prs_sequence.py",  "statistical.py"], #agents to evaluate
  configuration={"episodeSteps": 100} #number of episodes
)

[[0, 0]]

11) Бот на четном шаге выбирает бумагу, на остальных - случайное

In [60]:
%%writefile even_step_paper.py
import random

def even_step_paper(observation, configuration):
  # if even step
  if not observation.step % 2: 
    return 1
  else:
    return random.choice([0, 2])

Writing even_step_paper.py


In [61]:
evaluate(
  "rps", #environment to use - no need to change
  ["even_step_paper.py",  "statistical.py"], #agents to evaluate
  configuration={"episodeSteps": 100} #number of episodes 
)

[[-27.0, 27.0]]

Воспользуемся функцией evaluate из библиотеки kaggle_environments с помощью которой запустим наших агентов и проведем эксперимент на заданном количестве игр

In [116]:
agents = ['rock_agent',
    'paper_agent',
    'scissors_agent',
    'copy_opponent',
    'random_agent',
    'hit_the_last_own_action',
    'nash_equilibrium',
    'statistical',
    'Johnson_agent', 
    'prs_sequence', 
    'even_step_paper'
]

agents = [agent_name + ".py" for agent_name in agents]
agents


['rock_agent.py',
 'paper_agent.py',
 'scissors_agent.py',
 'copy_opponent.py',
 'random_agent.py',
 'hit_the_last_own_action.py',
 'nash_equilibrium.py',
 'statistical.py',
 'Johnson_agent.py',
 'prs_sequence.py',
 'even_step_paper.py']

In [119]:
scores = []
for i in range(len(agents)):
  scores.append([0,0,0,0,0,0,0,0,0,0,0])

scores

[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]

Турнир

In [121]:
#каждый с каждым

for i in range(len(agents)):
  for j in range(i+1, len(agents)):
      
    current_score = evaluate(
      "rps", 
      [agents[i], agents[j]], 
      configuration={"episodeSteps": 100, 'tieRewardThreshold': 2}
    )
        
    scores[i][j] = current_score[0][0]
    scores[j][i] = current_score[0][1]

In [122]:
from pprint import pprint
pprint(scores)

[[0, -99.0, 99.0, 0, 46.0, 0, -4.0, -98.0, -67.0, 0, -25.0],
 [99.0, 0, -99.0, 0, -45.0, 0, -3.0, -97.0, -38.0, 0, 0],
 [-99.0, 99.0, 0, 0, 23.0, 0, -7.0, -99.0, -33.0, 0, 26.0],
 [0, 0, 0, 0, -5.0, -99.0, -14.0, -22.0, -15.0, -99.0, 0],
 [-46.0, 45.0, -23.0, 5.0, 0, 14.0, 9.0, -37.0, 27.0, 6.0, 21.0],
 [0, 0, 0, 99.0, -14.0, 0, 0, 2.0, -30.0, -99.0, 8.0],
 [4.0, 3.0, 7.0, 14.0, -9.0, 0, 0, -11.0, 11.0, -5.0, 15.0],
 [98.0, 97.0, 99.0, 22.0, 37.0, -2.0, 11.0, 0, 4.0, 0, 23.0],
 [67.0, 38.0, 33.0, 15.0, -27.0, 30.0, -11.0, -4.0, 0, 26.0, 9.0],
 [0, 0, 0, 99.0, -6.0, 99.0, 5.0, 0, -26.0, 0, -7.0],
 [25.0, 0, -26.0, 0, -21.0, -8.0, -15.0, -23.0, -9.0, 7.0, 0]]


In [135]:
# определяем победителя
winner = []
for i in range(len(agents)):
  count_win = 0
  for j in range(len(agents)):
    if scores[i][j] > 0:
      count_win += 1
  winner.append(count_win)
   

In [136]:
winner1 = winner.index(max(winner))
agents[winner1]

'statistical.py'

Наиболее выигрышной является стратегия, основанная на статистике игр