In [None]:
# We want to model an urn with multi-colored balls,
# where we can change the composition by adding balls.
# Furthermore, we want to be able to randomly draw
# from this urn, and visualize the change in composition
# (and therefore a change in the probability distribution.)
import numpy.random
from collections import Counter

In [None]:
# Let's make a dictionary.
urn = dict()

In [None]:
# Add initial balls.
urn.update({"blue":3, "yellow":3, "red":3})

In [None]:
urn

In [None]:
urn['blue'] += 1
urn['blue']

In [None]:
urn

In [None]:
total_num = sum([urn[color] for color in urn])
total_num

In [None]:
probabilities = [urn[color]/total_num for color in urn]
probabilities

In [None]:
# We can also make a dictionary to keep track of probabilities
prob_dict = dict(zip(urn.keys(),probabilities))
prob_dict

In [None]:
#for color in urn:
#    probabilities.append(urn[color]/total_num)

In [None]:
numpy.random.choice?

In [None]:
# Drawing from (static) urn ("with replacement")
draw_hundred = numpy.random.choice(list(urn.keys()), size=10000, p=probabilities)
Counter(draw_hundred)

In [None]:
# Now we have an urn, and we know how to draw from it.
# A toy model of reinforcement learning will be a loop.
# The loop will encode assumptions about reinforcement,
# e.g. the goal, the amount of reinforcement (punishment).

In [None]:
def reinforcement_loop(urn=None, goal=None, iterations=10):
    '''
    This function is a simple reinforcement loop.
    
    Takes three arguments.
    
    urn: dictionary
    goal: key from urn
    iterations: integer
    
    '''
    
    assert goal in urn.keys(), 'Please choose a color from the urn'
    
    # Number of loops
    i = 0
    
    # Initialize probabilities
    total_num = sum([urn[color] for color in urn])
    probabilities = [urn[color]/total_num for color in urn]
    
    prob_dict = dict(zip(urn.keys(),probabilities))
    
    # Loop
    while i < iterations:
    
        # Draw
        draw = numpy.random.choice(list(urn.keys()), size=1, p=probabilities)
        print(f"I drew a {draw.item()} ball. There were {total_num} balls, and the probability of drawing {draw.item()} was {prob_dict[draw.item()]}.")
        
        # Reinforce +1 if success!
        if draw == goal:
            urn[goal] += 1
        
        # Re-calculate total number of balls in urn
        total_num = sum([urn[color] for color in urn])
        print(f'Number of balls in urn after reinforcing: {total_num}')
        
        # Update probabilities
        probabilities = [urn[color]/total_num for color in urn]
        prob_dict = dict(zip(urn.keys(), probabilities))
        
        # Increment i
        i += 1
    
    pass

In [None]:
# Reset urn since we are in a jupyter notebook and 
# changing the same dictionary object over and over.
urn.update({"blue":3, "yellow":3, "red":3})
reinforcement_loop(urn=urn, goal="blue", iterations=20)