In [17]:
import gym

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import json
import math
import random
import numpy as np
import scipy as sp
import scipy.stats as st
import scipy.integrate as integrate
from scipy.stats import multivariate_normal
from sklearn import linear_model
from sklearn.utils.testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning
import statsmodels.api as sm
from matplotlib.colors import LogNorm
import pickle
import collections
from tqdm import tqdm
import copy
from collections import deque

from joblib import Parallel, delayed
import multiprocessing
from collections import namedtuple
from itertools import count

import cProfile
from datetime import datetime

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
from torch.autograd import Variable

sns.set_style("whitegrid")
sns.set_palette("colorblind")
palette = sns.color_palette()
figsize = (15,8)
legend_fontsize = 16

from matplotlib import rc
rc('font',**{'family':'sans-serif'})
rc('text', usetex=True)
rc('text.latex',preamble=r'\usepackage[utf8]{inputenc}')
rc('text.latex',preamble=r'\usepackage[russian]{babel}')
rc('figure', **{'dpi': 300})

## Реализуем крестики-нолики

In [2]:
N_ROWS, N_COLS, N_WIN = 3, 3, 3

In [86]:
class TicTacToe(gym.Env):
    def __init__(self, n_rows=N_ROWS, n_cols=N_COLS, n_win=N_WIN, clone=None):
        if clone is not None:
            self.n_rows, self.n_cols, self.n_win = clone.n_rows, clone.n_cols, clone.n_win
            self.board = copy.deepcopy(clone.board)
            self.curTurn = clone.curTurn
            self.emptySpaces = None
            self.boardHash = None
        else:
            self.n_rows = n_rows
            self.n_cols = n_cols
            self.n_win = n_win

            self.reset()

    def getEmptySpaces(self):
        if self.emptySpaces is None:
            res = np.where(self.board == 0)
            self.emptySpaces = np.array([ (i, j) for i,j in zip(res[0], res[1]) ])
        return self.emptySpaces

    def makeMove(self, player, i, j):
        self.board[i, j] = player
        self.emptySpaces = None
        self.boardHash = None

    def getHash(self):
        if self.boardHash is None:
            self.boardHash = ''.join(['%s' % (x+1) for x in self.board.reshape(self.n_rows * self.n_cols)])
        return self.boardHash

    def isTerminal(self):
        # проверим, не закончилась ли игра
        cur_marks, cur_p = np.where(self.board == self.curTurn), self.curTurn
        for i,j in zip(cur_marks[0], cur_marks[1]):
            win = False
            if i <= self.n_rows - self.n_win:
                if np.all(self.board[i:i+self.n_win, j] == cur_p):
                    win = True
            if not win:
                if j <= self.n_cols - self.n_win:
                    if np.all(self.board[i,j:j+self.n_win] == cur_p):
                        win = True
            if not win:
                if i <= self.n_rows - self.n_win and j <= self.n_cols - self.n_win:
                    if np.all(np.array([ self.board[i+k,j+k] == cur_p for k in range(self.n_win) ])):
                        win = True
            if not win:
                if i <= self.n_rows - self.n_win and j >= self.n_win-1:
                    if np.all(np.array([ self.board[i+k,j-k] == cur_p for k in range(self.n_win) ])):
                        win = True
            if win:
                self.gameOver = True
                return self.curTurn

        if len(self.getEmptySpaces()) == 0:
            self.gameOver = True
            return 0

        self.gameOver = False
        return None

    def printBoard(self):
        for i in range(0, self.n_rows):
            print('----'*(self.n_cols)+'-')
            out = '| '
            for j in range(0, self.n_cols):
                if self.board[i, j] == 1:
                    token = 'x'
                if self.board[i, j] == -1:
                    token = 'o'
                if self.board[i, j] == 0:
                    token = ' '
                out += token + ' | '
            print(out)
        print('----'*(self.n_cols)+'-')

    def getState(self):
        return (self.getHash(), self.getEmptySpaces(), self.curTurn)

    def action_from_int(self, action_int):
        return ( int(action_int / self.n_cols), int(action_int % self.n_cols))

    def int_from_action(self, action):
        return action[0] * self.n_cols + action[1]
    
    def step(self, action):
        if self.board[action[0], action[1]] != 0:
            return self.getState(), -10, True, {}
        self.makeMove(self.curTurn, action[0], action[1])
        reward = self.isTerminal()
        self.curTurn = -self.curTurn
        return self.getState(), 0 if reward is None else reward, reward is not None, {}

    def reset(self):
        self.board = np.zeros((self.n_rows, self.n_cols), dtype=int)
        self.boardHash = None
        self.gameOver = False
        self.emptySpaces = None
        self.curTurn = 1

In [87]:
def plot_test_game(env, pi1, pi2, random_crosses=False, random_naughts=True):
    '''Играем тестовую партию между стратегиями или со случайными ходами, рисуем ход игры'''
    done = False
    env.reset()
    while not done:
        s, actions = env.getHash(), env.getEmptySpaces()
        env.printBoard()
        if env.curTurn == 1:
            a = pi1.get_action(env, s, False)
        else:
            a = pi2.get_action(env, s, False)
        observation, reward, done, info = env.step(a)
        if reward == 1:
            print("Крестики выиграли!")
            env.printBoard()
        if reward == -1:
            print("Нолики выиграли!")
            env.printBoard()

In [88]:
def test_games(env, pi1, pi2, games):
    results = {'x': 0, '0': 0, 'total': 0}
    for i in range(games):
        done = False
        env.reset()
        while not done:
            s, actions = env.getHash(), env.getEmptySpaces()
            if env.curTurn == 1:
                a = pi1.get_action(env, s, False)
            else:
                a = pi2.get_action(env, s, False)
            observation, reward, done, info = env.step(a)
            if reward == 1:
                results['x'] += 1
            if reward == -1:
                results['0'] += 1
        results['total'] += 1
    return results

In [253]:
def plot_user_game(env, pi1, pi2, random_crosses=False, random_naughts=True):
    '''Играем тестовую партию между стратегиями или со случайными ходами, рисуем ход игры'''
    done = False
    env.reset()
    while not done:
        s, actions = env.getHash(), env.getEmptySpaces()
        env.printBoard()
        if env.curTurn == 1:
            a = pi1.get_action(env, s, False)
        else:
            x = int(input())
            y = int(input())
            a = np.array([x, y])
        observation, reward, done, info = env.step(a)
        if reward == 1:
            print("Крестики выиграли!")
            env.printBoard()
        if reward == -1:
            print("Нолики выиграли!")
            env.printBoard()

# Часть первая: крестики-нолики при помощи Q-обучения

In [90]:
class QL():
    def __init__(self, desk_size):
        self.q = dict()
        self.desk_size = desk_size
        self.gamma = 0.9
        
        self.old_hash = None
        self.old_action = None
        self.old_reward = None
        
    def forget_game(self):
        self.old_hash = None
        self.old_action = None
        self.old_reward = None
        
    def get_action(self, env, state, rand):
        if rand:
            actions = env.getEmptySpaces()
            idx = np.random.randint(len(actions))
            action = actions[idx]
        else:
            s_hash = env.getHash()
            if s_hash not in self.q:
                self.q[s_hash] = np.zeros(self.desk_size**2)                
            max_value = self.q[s_hash].max()
            idxs = np.argwhere(self.q[s_hash]==max_value)
            idx = np.random.randint(len(idxs))
            action = env.action_from_int(idxs[idx])
        return action
    
    # Обновляет политику после хода соперника
    def update(self, old_hash, action, reward):
        if self.old_hash is None:
            self.old_hash = old_hash
            self.old_action = action
            self.old_reward = reward
            return
        
        new_hash = old_hash
        old_hash = self.old_hash
        if old_hash not in self.q:
            self.q[old_hash] = np.zeros(desk_size**2)
        if new_hash not in self.q:
            self.q[new_hash] = np.zeros(desk_size**2)
            
        old_act_int = env.int_from_action(self.old_action)
        new_max_q = self.q[new_hash].max()
        self.q[old_hash][old_act_int] = self.old_reward + self.gamma * new_max_q
            
        self.old_hash = new_hash
        self.old_action = action
        self.old_reward = reward
    
    # Обновляет политику сразу, так как игра закончилась
    def change_q(self, reward, s_hash, action):
        if s_hash not in self.q:
            self.q[s_hash] = np.zeros(desk_size**2)
        act_int = env.int_from_action(action)
        self.q[s_hash][act_int] = reward

In [91]:
def q_train(env, eps, n_steps, pi1, pi2):
    env.reset()
    for i in tqdm(range(n_steps)):
        pi = pi1 if env.curTurn == 1 else pi2
        rand = (random.random() < eps)
        old_hash = env.getHash()
        action = pi.get_action(env, old_hash, rand)
        
        next_state, reward, done, _ = env.step(action)
        next_hash = next_state[0]
        if abs(reward) == 1:
            reward = abs(reward)
            another_pi = pi1 if env.curTurn == 1 else pi2
            another_pi.change_q(-reward, another_pi.old_hash, another_pi.old_action)
            pi.change_q(reward, old_hash, action)
        if reward == -10:
            pi.change_q(reward, old_hash, action)
        pi.update(old_hash, action, reward)
        if done:
            env.reset()
            old_hash = env.getHash()
        else:
            old_hash = next_hash
        if done:
            pi1.forget_game()
            pi2.forget_game()

In [92]:
desk_size = 3
env = TicTacToe(n_rows=desk_size, n_cols=desk_size, n_win=3)
pi1 = QL(desk_size)
pi2 = QL(desk_size)

q_train(env, 0.1, 1000000, pi1, pi2)

100%|██████████| 1000000/1000000 [02:16<00:00, 7315.40it/s]


In [93]:
# покажем, что агент умеет выигрывать
env = TicTacToe(n_rows=3, n_cols=3, n_win=3)
plot_user_game(env, pi1, pi2)

-------------
|   |   |   | 
-------------
|   |   |   | 
-------------
|   |   |   | 
-------------
(2, 0)
-------------
|   |   |   | 
-------------
|   |   |   | 
-------------
| x |   |   | 
-------------
1
1
-------------
|   |   |   | 
-------------
|   | o |   | 
-------------
| x |   |   | 
-------------
(1, 0)
-------------
|   |   |   | 
-------------
| x | o |   | 
-------------
| x |   |   | 
-------------
0
0
-------------
| o |   |   | 
-------------
| x | o |   | 
-------------
| x |   |   | 
-------------
(2, 2)
-------------
| o |   |   | 
-------------
| x | o |   | 
-------------
| x |   | x | 
-------------
0
1
-------------
| o | o |   | 
-------------
| x | o |   | 
-------------
| x |   | x | 
-------------
(2, 1)
Крестики выиграли!
-------------
| o | o |   | 
-------------
| x | o |   | 
-------------
| x | x | x | 
-------------


In [94]:
env = TicTacToe(n_rows=3, n_cols=3, n_win=3)
results = test_games(env, pi1, pi2, 1000)
print('Побед крестиков: {}%'.format(results['x'] / results['total'] * 100))
print('Побед ноликов: {}%'.format(results['0'] / results['total'] * 100))

Побед крестиков: 0.7000000000000001%
Побед ноликов: 0.0%


In [294]:
desk_size = 4
env = TicTacToe(n_rows=desk_size, n_cols=desk_size, n_win=4)
pi1 = QL(desk_size)
pi2 = QL(desk_size)

q_train(env, 0.1, 20000000, pi1, pi2)

100%|██████████| 20000000/20000000 [45:31<00:00, 7321.30it/s] 


In [300]:
env = TicTacToe(n_rows=4, n_cols=4, n_win=4)
plot_user_game(env, pi1, pi2)

-----------------
|   |   |   |   | 
-----------------
|   |   |   |   | 
-----------------
|   |   |   |   | 
-----------------
|   |   |   |   | 
-----------------
(2, 1)
-----------------
|   |   |   |   | 
-----------------
|   |   |   |   | 
-----------------
|   | x |   |   | 
-----------------
|   |   |   |   | 
-----------------
1
1
-----------------
|   |   |   |   | 
-----------------
|   | o |   |   | 
-----------------
|   | x |   |   | 
-----------------
|   |   |   |   | 
-----------------
(0, 3)
-----------------
|   |   |   | x | 
-----------------
|   | o |   |   | 
-----------------
|   | x |   |   | 
-----------------
|   |   |   |   | 
-----------------
0
2
-----------------
|   |   | o | x | 
-----------------
|   | o |   |   | 
-----------------
|   | x |   |   | 
-----------------
|   |   |   |   | 
-----------------
(1, 2)
-----------------
|   |   | o | x | 
-----------------
|   | o | x |   | 
-----------------
|   | x |   |   | 
-----------------
|   |   |   

In [301]:
env = TicTacToe(n_rows=4, n_cols=4, n_win=4)
results = test_games(env, pi1, pi2, 1000)
print('Побед крестиков: {}%'.format(results['x'] / results['total'] * 100))
print('Побед ноликов: {}%'.format(results['0'] / results['total'] * 100))

Побед крестиков: 1.7000000000000002%
Побед ноликов: 1.2%


# Часть вторая: добавим нейронных сетей

In [243]:
GAMMA = 0.9
STEPS_PER_UPDATE = 4
STEPS_PER_TARGET_UPDATE = STEPS_PER_UPDATE * 1000
BATCH_SIZE = 128
LEARNING_RATE = 0.002
REPLAY_LEN = 4000

In [244]:
class DQN_model(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(DQN_model, self).__init__()
        self.hiddim = 128
        self.conv_size = 3
        self.conv_number = 32
        self.relu = nn.ReLU()
        self.lin_input = (state_dim - self.conv_size + 1)**2 * self.conv_number
        
        self.conv = nn.Conv2d(in_channels=1, out_channels=self.conv_number, kernel_size=self.conv_size)
        self.liner1 = nn.Linear(self.lin_input, self.hiddim)
        self.liner2 = nn.Linear(self.hiddim, action_dim)
    def forward(self, x):
        result = torch.flatten(self.conv(x), start_dim=1)
        result = self.relu(self.liner1(result))
        result = self.liner2(result)
        return result

In [245]:
class DQN_duo_model(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(DQN_duo_model, self).__init__()
        self.hiddim = 32
        self.conv_size = 3
        self.conv_number = 32
        self.relu = nn.ReLU()
        self.lin_input = (state_dim - self.conv_size + 1)**2 * self.conv_number
        
        self.conv = nn.Conv2d(in_channels=1, out_channels=self.conv_number, kernel_size=self.conv_size)
        
        self.value = nn.Sequential(
            nn.Linear(self.lin_input, self.hiddim),
            nn.ReLU(),
            nn.Linear(self.hiddim, 1)
        )

        self.advantage = nn.Sequential(
            nn.Linear(self.lin_input, self.hiddim),
            nn.ReLU(),
            nn.Linear(self.hiddim, action_dim)
        )
            
    def forward(self, x):
        features = torch.flatten(self.conv(x), start_dim=1)
        values = self.value(features)
        advantages = self.advantage(features)
        result = values + advantages - torch.mean(advantages, dim=1, keepdim=True)
        return result

In [246]:
class DQN_model(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(DQN_model, self).__init__()
        self.hiddim = 32
        self.conv_size = 3
        self.conv_number = 32
        self.relu = nn.ReLU()
        self.lin_input = (state_dim - self.conv_size + 1)**2 * self.conv_number
        
        self.conv = nn.Conv2d(in_channels=1, out_channels=self.conv_number, kernel_size=self.conv_size)
        self.liner1 = nn.Linear(self.lin_input, self.hiddim)
        self.liner2 = nn.Linear(self.hiddim, action_dim)
    def forward(self, x):
        result = torch.flatten(self.conv(x), start_dim=1)
        result = self.relu(self.liner1(result))
        result = self.liner2(result)
        return result

In [247]:
class DQN:
    def __init__(self, state_dim, action_dim, is_duo):
        self.steps = 0
        self.state_dim = state_dim
        self.model = DQN_duo_model(state_dim, action_dim) if is_duo else DQN_model(state_dim, action_dim)
        self.model_t = copy.deepcopy(self.model)
        self.replay = deque(maxlen=REPLAY_LEN)
        self.batch_size = BATCH_SIZE
        self.criterion = torch.nn.MSELoss()
        self.optim = torch.optim.Adam(self.model.parameters(), lr=LEARNING_RATE)
        
        self.old_hash = None
        self.old_action = None
        self.old_reward = None
        self.old_done = None
        
    def forget_game(self):
        self.old_hash = None
        self.old_action = None
        self.old_reward = None
        self.old_done = None

    def sample_batch(self):
        batch = random.sample(self.replay, self.batch_size)
        return list(zip(*batch))
        
    def train_step(self, batch):
        state, action, next_state, reward, done = batch
        state = torch.tensor(np.array(state, dtype=np.float32).reshape(BATCH_SIZE, 1,self.state_dim, -1))
        action = torch.tensor(np.array(action, dtype=np.int))
        next_state = torch.tensor(np.array(next_state, dtype=np.float32).reshape(BATCH_SIZE, 1,self.state_dim, -1))
        reward = torch.tensor(np.array(reward, dtype=np.float32))
        done = torch.tensor(np.array(done, dtype=np.float32))
        
        Qt = self.model_t(next_state).max(dim=1)[0]
        Qt[done == 1] = 0
        Qt = reward + torch.mul(Qt, GAMMA)
        
        Q_all = self.model(state)
        Q = Q_all[np.arange(Q_all.shape[0]), action]
        
        self.optim.zero_grad() 
        loss = self.criterion(Q, Qt)
        loss.backward()
        self.optim.step()           

    def get_action(self, env, state, rand):
        if rand:
            actions = env.getEmptySpaces()
            idx = np.random.randint(len(actions))
            action = actions[idx]
        else:
            if type(state) == str:
                state = [int(x) for x in state]
            with torch.no_grad():
                state = np.array(state, dtype=np.float32).reshape(1, 1, self.state_dim, -1)
                state = torch.tensor(state)
                actions = self.model(state).numpy()
                action = np.argmax(actions)
                action = env.action_from_int(action)
        return action
    
    # Обновляет политику после хода соперника
    def update(self, reward, old_hash, action, done):
        if self.old_hash is None:
            self.old_hash = old_hash
            self.old_action = action
            self.old_reward = reward
            self.old_done = done
            return
        
        new_hash = old_hash
        old_hash = self.old_hash
        old_act_int = env.int_from_action(self.old_action)
        
        self.replay.append((old_hash, old_act_int, new_hash, self.old_reward, self.old_done))
        self.old_hash = new_hash
        self.old_action = action
        self.old_reward = reward
        self.old_done = done
        
        if (self.steps % STEPS_PER_UPDATE == 0) and (len(self.replay) > BATCH_SIZE):
            batch = self.sample_batch()
            self.train_step(batch)
        if self.steps % STEPS_PER_TARGET_UPDATE == 0:
            self.model_t = copy.deepcopy(self.model)
        self.steps += 1
    
    # Обновляет политику сразу, так как игра закончилась    
    def quick_update(self, reward, state, action, done, next_state):
        act_int = env.int_from_action(action)
        self.replay.append((state, act_int, next_state, reward, done))
        if (self.steps % STEPS_PER_UPDATE == 0) and (len(self.replay) > BATCH_SIZE):
            batch = self.sample_batch()
            self.train_step(batch)
        if self.steps % STEPS_PER_TARGET_UPDATE == 0:
            self.model_t = copy.deepcopy(self.model)
        self.steps += 1

In [248]:
def dqn_train(env, eps, desk_size, n_steps, pi1, pi2):
    dummy_state = [0] * desk_size**2
    env.reset()
    for i in tqdm(range(n_steps)):
        pi = pi1 if env.curTurn == 1 else pi2
        rand = (random.random() < eps)
        old_hash = env.getHash()
        old_hash = [int(x) for x in old_hash]
        action = pi.get_action(env, old_hash, rand)
        next_state, reward, done, _ = env.step(action)
        next_hash = next_state[0]
        next_hash = [int(x) for x in next_hash]
        if abs(reward) == 1:
            reward = abs(reward)
            another_pi = pi1 if env.curTurn == 1 else pi2
            another_pi.quick_update(-reward, another_pi.old_hash, another_pi.old_action, done, dummy_state)
            pi.quick_update(reward, old_hash, action, done, dummy_state)
        if reward == -10:
            pi.quick_update(reward, old_hash, action, done, dummy_state)
        pi.update(reward, old_hash, action, done)
        if done:
            env.reset()
            old_hash = env.getHash()
            old_hash = [int(x) for x in old_hash]
            pi1.forget_game()
            pi2.forget_game()
        else:
            old_hash = next_hash

In [257]:
desk_size = 3
eps = 0.2
n_steps = 400000
env = TicTacToe(n_rows=desk_size, n_cols=desk_size, n_win=3)
pi1 = DQN(state_dim=desk_size, action_dim=desk_size**2, is_duo=False)
pi2 = DQN(state_dim=desk_size, action_dim=desk_size**2, is_duo=False)

dqn_train(env, eps, desk_size, n_steps, pi1, pi2)

100%|██████████| 400000/400000 [09:22<00:00, 710.98it/s]


In [262]:
env = TicTacToe(n_rows=3, n_cols=3, n_win=3)
plot_user_game(env, pi1, pi2)

-------------
|   |   |   | 
-------------
|   |   |   | 
-------------
|   |   |   | 
-------------
-------------
|   |   |   | 
-------------
|   |   | x | 
-------------
|   |   |   | 
-------------
1
1
-------------
|   |   |   | 
-------------
|   | o | x | 
-------------
|   |   |   | 
-------------
-------------
|   |   |   | 
-------------
|   | o | x | 
-------------
|   |   | x | 
-------------
0
2
-------------
|   |   | o | 
-------------
|   | o | x | 
-------------
|   |   | x | 
-------------
-------------
|   |   | o | 
-------------
|   | o | x | 
-------------
| x |   | x | 
-------------
0
0
-------------
| o |   | o | 
-------------
|   | o | x | 
-------------
| x |   | x | 
-------------
Крестики выиграли!
-------------
| o |   | o | 
-------------
|   | o | x | 
-------------
| x | x | x | 
-------------


In [263]:
# Так как политика детерминированная, исход всегла одинаков
env = TicTacToe(n_rows=3, n_cols=3, n_win=3)
results = test_games(env, pi1, pi2, 1000)
print('Побед крестиков: {}%'.format(results['x'] / results['total'] * 100))
print('Побед ноликов: {}%'.format(results['0'] / results['total'] * 100))

Побед крестиков: 0.0%
Побед ноликов: 100.0%


In [264]:
desk_size = 3
eps = 0.2
n_steps = 200000
env = TicTacToe(n_rows=desk_size, n_cols=desk_size, n_win=3)
pi1 = DQN(state_dim=desk_size, action_dim=desk_size**2, is_duo=True)
pi2 = DQN(state_dim=desk_size, action_dim=desk_size**2, is_duo=True)

dqn_train(env, eps, desk_size, n_steps, pi1, pi2)

100%|██████████| 200000/200000 [06:24<00:00, 519.77it/s]


In [265]:
env = TicTacToe(n_rows=3, n_cols=3, n_win=3)
plot_user_game(env, pi1, pi2)

-------------
|   |   |   | 
-------------
|   |   |   | 
-------------
|   |   |   | 
-------------
-------------
|   |   |   | 
-------------
|   | x |   | 
-------------
|   |   |   | 
-------------
0
0
-------------
| o |   |   | 
-------------
|   | x |   | 
-------------
|   |   |   | 
-------------
-------------
| o |   |   | 
-------------
|   | x |   | 
-------------
| x |   |   | 
-------------
0
2
-------------
| o |   | o | 
-------------
|   | x |   | 
-------------
| x |   |   | 
-------------
-------------
| o | x | o | 
-------------
|   | x |   | 
-------------
| x |   |   | 
-------------
2
2
-------------
| o | x | o | 
-------------
|   | x |   | 
-------------
| x |   | o | 
-------------
-------------
| o | x | o | 
-------------
|   | x | x | 
-------------
| x |   | o | 
-------------
1
0
-------------
| o | x | o | 
-------------
| o | x | x | 
-------------
| x |   | o | 
-------------
Крестики выиграли!
-------------
| o | x | o | 
-------------
| o | x | x |

In [266]:
env = TicTacToe(n_rows=3, n_cols=3, n_win=3)
results = test_games(env, pi1, pi2, 1000)
print('Побед крестиков: {}%'.format(results['x'] / results['total'] * 100))
print('Побед ноликов: {}%'.format(results['0'] / results['total'] * 100))

Побед крестиков: 100.0%
Побед ноликов: 0.0%
