<a href="https://colab.research.google.com/github/SzekelyAnna/atchekegroup1lunarlanding/blob/main/LunarLanderEvtools.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from matplotlib import pyplot as plt
import numpy as np
from numpy import random
import pandas as pd

In [None]:
all_model_rewards = {}

def generate_sample_models(number_of_models):
    '''
    This is function which generates sample models.
    '''
    for n in range(number_of_models):
        reward_dict = {}
        model_version_num = 'DQN' + str(n)
        x = np.linspace(1,100,100) ## these are the timesteps
        y = random.rand(100)
        reward_dict = {'x': x, 'y' : y, 'comment': 'here comes the comment'}
        all_model_rewards[model_version_num] = reward_dict
generate_sample_models(10)

In [None]:
def model_evaluations(all_model_rewards): 
    
    '''
    This is a function performing model evaluations. 
    
    Input: a dict in the following format:
    {'DQN5': {'x': array([  1.,   2.,   3.,  ....]),
             'y': array([0.47025739, 0.5788533 , 0.72454499,...]),
             'comment': 'comment for the model'}}
             
             
             
    The function performing the following metrics:
    
    
    1. Max reward. 
    2. Jumpstart: The initial performance of an agent in a target task may be improved by transfer from a source task.
    3. Asymptotic Performance: The final learned performance of an agent in the target task may be improved via transfer.
    4. Total Reward: The total reward accumulated by an agent (i.e., the area under the learning curve) may be improved if it uses transfer, compared to learning without transfer.
    '''
    
    
    
    

    max_reward = {}
    jumpstart = {}
    total_reward = {}
    asymptotic_performance = {}
    
    for m in all_model_rewards.keys():
        
        max_reward[m] = all_model_rewards[m]['y'].max()
        
        ####################################
        ######### NB! Here you can define on how long section do you want to evaluate jumpstart. 
        ######### It should start from the beginning of the array. 
        ####################################
        jumpstart[m] =  all_model_rewards[m]['y'][:10].mean() 
        
        total_reward[m] =  all_model_rewards[m]['y'].sum()
        
        ####################################
        ######### NB! Here you can define on how long section do you want to evaluate asymptotic performance. 
        ######### It end at the end of the session.
        ####################################
        asymptotic_performance[m] =  all_model_rewards[m]['y'][-10:].sum()

        
        
    ## Add the metrics to the final evaluation metric 
    
    model_evs['max_reward'] = max_reward
    model_evs['jumpstart'] = jumpstart
    model_evs['total_reward'] = total_reward
    model_evs['asymptotic_performance'] = asymptotic_performance

    
    
    
model_evaluations(all_model_rewards)  
results = pd.DataFrame(model_evs)
results.style.highlight_max(color = 'lightgreen', axis = 0)

In [None]:
def transfer_ratio(transfer_learner, scratch_model): 

    ''' Transfer Ratio: The ratio of the total reward accumulated by the transfer learner and the total reward accumulated by the non-transfer learner.'''

    transfer_ratio = (transfer_learner['y']/scratch_model['y']).sum()
    return (transfer_ratio)




def time_to_threshold(transfer_learner, scratch_model):
    
    
    '''
    Time to Threshold: The learning time needed by the agent to achieve a pre-specified perfor- mance level may be reduced via knowledge transfer.


    This function returns the first timestep of the transfer_learning model when it reaches the scratch
    model's maximum value. The threshold could be changed to 
        - any fixed number
        - ratio of the scratch model's maximum reward
        - the average of the final performance (averaged over the last n timesteps) of the scratch model. 
    '''
    
    threshold = scratch_model['y'].max()
    threshold_index = np.where(transfer_learner['y'] >= threshold)[0][0]
    
    
    return 'Transfer learner\'s performance reaches the threashold (scratch model\'s max performance) at timestap {}. Threshold is {}'.format(threshold_index, threshold)
    
    