# 1) Import packages

In [None]:
import random as rd
import numpy as np
import os 
import json

# 2) Load auxilliary functions

In [None]:
# Wrapper for sampling a single number with probability p
def random_bool( p = 0.1 ):
    number = np.random.binomial( 1 , p, 1 )
    result = number.tolist()[0] 
    return result

# Wrapper for N- normal random number with mean mu and variance sigma
def rnorm( mu, sigma, N ):
    number = np.random.normal(mu, sigma, N)
    result = number.tolist()
    return result

blbla = agent( )

# 3) Initialize class agent

In [None]:
class agent:
    
    # Set initial condition and default values
    def __init__( self, alpha = 0.5, epsilon = 0.01, mu = [0,0] , sigma = [1,1], \
                 init = None , time = 10, save = False ):
        self.alpha = alpha
        self.epsilon = epsilon
        self.mu = mu
        self.n_bandits = len(self.mu)
        self.sigma = sigma
        if init == None: 
            self.value_function = self.init_dict(range(self.n_bandits))
        else:
            self.value_function = self.init_dict(range(self.n_bandits),self.init)
        self.time = time
        self.save = save
    
    # Init an empty dictionary with 0 or specified initial values
    def init_dict(self, names , init_val = None ):
        max_itter = len(names)
        empty_dict = {}
        if init_val == None:
            init_val = [0] * max_itter
        for i in range(max_itter):
            temp_val = init_val[i]
            temp_list = [temp_val]
            temp_name = names[i]
            temp = {temp_name:temp_list}
            empty_dict.update(temp)
        return empty_dict

    # Create N-random bandits according to a Normal distribution with mean mu and variance sigma 
    # It returns a list of 
    def create_bandits(self, mu ,sigma, N ):
        if len(mu) != len(sigma):
            print "Unsufficient parameter input"
            return None
        n_bandit = len( mu )
        casino = []
        for i in range(n_bandit):
            number = rnorm( mu[i] , sigma[i] , N )
            casino.append(number)
        return casino
    
    # Finds the current maximum value in a dictionary
    def decide(self,value_dict ):
        run_time = len(value_dict)
        decission = []
        for i in range(run_time):
            temp = value_dict.get(i)[-1]
            decission.append(temp)
        index = decission.index(max(decission))
        return index
    
    # Updates the dictionary
    def update_dict(self, dic, next_choice, value, alpha ):
        full_range = range(len(dic))
        for key in full_range:
            temp = dic.get(key)
            if key == next_choice:
                temp_val = temp[-1]
                new_value = temp_val + alpha *( value - temp_val)
                temp.append(new_value)
            else:
                temp_val = temp[-1]
                temp.append(temp_val)
            
    # Start learning procedure
    def learn(self, run_time = None ):
        
        if run_time == None:
            run_time = self.time
            
        bandits = self.create_bandits( mu = self.mu, sigma = self.sigma, N = run_time )
        choice_count = len( self.mu )
        
        
        for step in range( run_time ):
    
            explore = random_bool( p = self.epsilon )

            if explore == 1:
               
                choice = np.random.choice(choice_count)
                reward = bandits[choice][step]
                self.update_dict( self.value_function, choice, reward,self.alpha )
                
            else:
                
                optimal = self.decide( self.value_function )
                reward = bandits[optimal][step]
                self.update_dict( self.value_function, optimal, reward,self.alpha )
               
    # Save current state of the value function                   
    def save_history( self, value_dict = None , name = None , path = None ):
        if value_dict == None:
            value_dict = self.value_function
        if name == None:
            # Prepare File name - Parameters
            str_alpha = "_alpha_" + str(self.alpha)
            str_epsilon = "_epsilon_" + str(self.epsilon)
            file_type = ".txt"
            name = str_alpha + str_epsilon + file_type
        if path == None:
            path = os.getcwd()
        value_file = path + name
        json.dump(value_dict, file(value_file, 'w'))
        
    # Clear value function of the agent      
    def re_init( self, init = None ):
        if init == None: 
            self.value_function = self.init_dict(range(self.n_bandits))
        else:
            self.value_function = self.init_dict(range(self.n_bandits),self.init)
     

# 4) DEMO

In [134]:
# Learning procedure
g=agent(mu = [1,2,3],sigma=[1,2,100])
g.learn(1000)

In [137]:
# Save current history
#g.save_history()
g.value_function.get(2)

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 98.32219396288704,
 47.10273390685226,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.246916367262294,
 -40.24691

In [None]:
# Re initialize 
g.re_init()
g.value_function

In [140]:
import matplotlib.pyplot 
matplotlib.pyplot.plot([1,2,3])
matplotlib.pyplot.show()

ValueError: unknown locale: UTF-8

In [141]:
import numpy as np
import matplotlib.pyplot as plt

N = 50
x = np.random.rand(N)
y = np.random.rand(N)

plt.scatter(x, y)
plt.show()

ValueError: unknown locale: UTF-8