In [2]:
#General imports
import numpy as np
import pandas as pd
from scipy.stats import sem, t, ttest_ind, iqr

import matplotlib.pyplot as plt
%matplotlib inline

from snakeGame.screen import Screen

#Reinforced Learning
from learningModels.typesSnakeGame import SnakeAI, SnakeAIBorders
from learningModels.process import LearningProcess, compute_avg_return

#Auxiliar functions
from utils import read_data, sampler

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


In [3]:
def paths_to_data(snake_games, rewards):
    """Returns a list of lists with the paths to the losses and returns .csv files of each iteration.
    (The loop its fixed to 3 iterations for simplicity)
    
    snakes (list) : Its a list of objects (SnakeAI or SnakeAIBorders).
    rewards (list): Its a list of dictionaries with the rewards used in the training.
    """
    
    losses_and_returns = []

    for snake in snake_games:
        for num, reward in enumerate(rewards):
            for i in range(3):
                paths = []
                paths.append('./trained_agents/' + snake.__class__.__name__ + '_{}_{}_{}'.format(len(snake.state()), num, i) + '/losses.csv')  
                paths.append('./trained_agents/' + snake.__class__.__name__ + '_{}_{}_{}'.format(len(snake.state()), num, i) + '/returns.csv')
                losses_and_returns.append(paths)
    
    return losses_and_returns

In [4]:
def visual_return_losses(df_returns, df_losses, titles=True, reward=0):
    """Visualization of the returns and losses. The in the returns plot are represented the evolution
    along the iterations and its average. (Its fixed to 75000 iterations where each 5000 and 3000 where tooked
    the data of the returns and losses).
    
    df_returns (DataFrame): returns of the network.
    df_losses (DataFrame): losses of the network.
    titles (Bool): Define if adds titles or not.
    reward (int): Position of the reward int the rewards list.
    """
    
    fig, ax = plt.subplots(1, 2, figsize=(12,7))

    if titles:
        #Title
        fig.suptitle('Training process performance \n {}'.format(REWARDS[reward]), fontsize=16)
        ax[0].set_title('Evolution of the Average Return ')
        ax[1].set_title('Evolution of the loss')
    
    #Data
    samples = df_returns[df_returns.columns[0:-1]]
    average = df_returns['average']
    iterations_avg = range(0, 75000 + 1, 5000)
    iterations_loss = range(0, 75000, 3000)
    #Graphs
    #Graph 1
    for i in samples.columns:
        ax[0].plot(iterations_avg, samples[i], alpha = 0.15, color='gray')
    ax[0].plot(iterations_avg, average, color='red', label='average')
    ax[0].set_ylabel('Average Return')
    ax[0].set_xlabel('Iterations')
    ax[0].legend()
    #Graph 2
    ax[1].plot(iterations_loss, df_losses)
    ax[1].set_ylabel('Loss')
    ax[1].set_xlabel('Iterations')
    plt.show()

In [5]:
def general_sample_visualization(losses_and_returns, reward):
    """Visualization of the groups of iterations for each reward. (the function its fixed to 3 iterations, and 
    10 samples per log of returns)
    
    reward (int): Position of the reward int the rewards list.
    """
    
    if reward > 0:
        reward = reward*3

    for i, val in enumerate(losses_and_returns[reward:reward+3]):
        df_losses = pd.read_csv(val[0], names=['losses'])
        df_returns = pd.read_csv(val[1], names=[str(i) for i in range(10)])
        df_returns['average'] = df_returns.mean(numeric_only=True, axis=1)
        titles = True if i == 0 else False
        
        visual_return_losses(df_returns, df_losses, titles, int(reward/3))

In [7]:
def average_comparative(losses_and_returns, reward):
    """Comparation of the iteration's average (its fixed the amount of iterations to 75000 where every 5000
    where tooked 10 samples).
    
    reward (int): Position of the reward int the rewards list.
    """
    
    if reward > 0:
        reward = reward*3
    
    fig, ax = plt.subplots(figsize=(10,7))
    iterations = range(0, 75000 + 1, 5000)
    fig.suptitle('Average Returns in each Iteration', fontsize=18)
    ax.set_xlabel('Iterations', fontsize=12)
    ax.set_ylabel('Returns', fontsize=12)

    for i, val in enumerate(losses_and_returns[reward:reward+3]):
        df_losses = pd.read_csv(val[0],  names=['losses'])
        df_returns = pd.read_csv(val[1], names=[str(num) for num in range(10)])
        df_returns['average'] = df_returns.mean(numeric_only=True, axis=1)
        ax.plot(iterations, df_returns['average'], label=str(i))
    ax.legend()
    plt.show()

In [8]:
def samples_per_iteration(reward, samples):
    
    """Calculate a specific number of samples for the reward for each iteration
    (the function its fixed to 3 iterations).
    
    reward (int): Position of the reward int the rewards list.
    samples (int): Amount of samples that will be taken.
    
    returns a pandas DataFrame
    """
    
    df = pd.DataFrame()
    
    for i in range(3):
        sample = pd.DataFrame(sampler(SNAKE_GAMES[0], REWARDS[reward], reward, i, samples), columns=[str(i)])
        df = pd.concat([df, sample], axis=1)
    
    return df    

In [9]:
def histogram(data, bins=20):
    """Histogram with the means and standard errors (the function its fixed to 3 iterations)."""
    
    fig, ax = plt.subplots(1, 3, figsize=(12,7))
    
    for i in range(3):
        ax[i]
        ax[i].hist(data.iloc[:,i], bins=bins)
        ax[i].set_title('iteration {}'.format(i))
        ax[i].set_xlabel('Total Reward')
        ax[i].set_ylabel('Frequency')
        ax[i].axvline(data.iloc[:,i].mean(), color='red')
        
        avg = data.iloc[:,i].mean()
        s = np.std(data.iloc[:,i])
        
        data_str = '\n'.join((r'$\bar{x}=%.2f$' % (avg, ),
                             r'$\mathrm{s}=%.2f$' % (s, )))
        
        ax[i].text(0.6,0.85, data_str, fontsize=13, transform=ax[i].transAxes )
    
    plt.show()

In [10]:
def delete_outliers(data):
    """Delete the outliers using the inter quantile range (the function its fixed to 3 iterations).
    
    data (DataFrame): Data of the samples.
    """

    for i in range(3):
        inter_qr = iqr(data.iloc[:,i])
        q_25 = np.quantile(data.iloc[:,i], 0.25)
        q_75 = np.quantile(data.iloc[:,i], 0.75)
        left = q_25 - 1.5 * inter_qr
        right = q_75 + 1.5 * inter_qr
        mask = (data.iloc[:,i] < left) | (data.iloc[:,i] > right)
        data.loc[mask, str(i)] = None
    
    return data

In [11]:
def confidence_interval(data, confidence=0.05):
    """Evaluates the confidence intervals for each iteration.
    
    data (DataFrame): Samples data.
    """
    
    n = len(data)
    mean = data.mean()
    std_error = sem(data, nan_policy='omit')
    
    z = t.ppf( 1 - confidence/2, n)
    
    left = mean - std_error*z
    right = mean + std_error*z
    
    intervals =pd.DataFrame([list(left), list(right)], columns=[i for i in range(3)], index=['left', 'right']).transpose()
    
    return intervals

In [12]:
def test_means(best, data):
    """Evaluate a two side hypothesis t-test considering distinc variances.
    
    best (int): Number of the best iteration.
    data (DataFrame): Data of the samples.
    """
    results = {}
    for i in range(len(data.columns)):   
        if i == best:
            continue
        else:
            results['{}_vs_{}'.format(best, i)] = (list(ttest_ind(data.iloc[:,best], data.iloc[:,i], equal_var=False, nan_policy='omit')))
    
    df_results = pd.DataFrame(results, index=['Statistic', 'p_value']).transpose()
    
    return df_results