In [1]:
# We want to model an urn with multi-colored balls,
# where we can change the composition by adding balls.
# Furthermore, we want to be able to randomly draw
# from this urn, and visualize the change in composition
# (and therefore a change in the probability distribution.)
import numpy.random
from collections import Counter

In [2]:
# Let's make a dictionary.
urn = dict()

In [3]:
# Add initial balls.
urn.update({"blue":3, "yellow":3, "red":3})

In [4]:
urn

{'blue': 3, 'yellow': 3, 'red': 3}

In [5]:
urn['blue'] += 1
urn['blue']

4

In [6]:
urn

{'blue': 4, 'yellow': 3, 'red': 3}

In [7]:
total_num = sum([urn[color] for color in urn])
total_num

10

In [8]:
probabilities = [urn[color]/total_num for color in urn]
probabilities

[0.4, 0.3, 0.3]

In [9]:
zip?

In [10]:
# We can also make a dictionary to keep track of probabilities
prob_dict = dict(zip(urn.keys(),probabilities))
prob_dict

{'blue': 0.4, 'yellow': 0.3, 'red': 0.3}

In [11]:
#for color in urn:
#    probabilities.append(urn[color]/total_num)

In [12]:
numpy.random.choice?

In [13]:
# Drawing from (static) urn ("with replacement")
draw_ten_thousand = numpy.random.choice(list(urn.keys()), size=10000, p=probabilities)
Counter(draw_ten_thousand)

Counter({'yellow': 3049, 'blue': 3980, 'red': 2971})

In [14]:
# Now we have an urn, and we know how to draw from it.
# A toy model of reinforcement learning will be a loop.
# The loop will encode assumptions about reinforcement,
# e.g. the goal, the amount of reinforcement (punishment).

In [15]:
def reinforcement_loop(urn=None, goal=None, iterations=10):
    '''
    This function is a simple reinforcement loop.
    
    Takes three arguments.
    
    urn: dictionary
    goal: key from urn
    iterations: integer
    
    '''
    
    assert goal in urn.keys(), 'Please choose a color from the urn'
    
    # Number of loops
    i = 0
    
    # Initialize probabilities
    total_num = sum([urn[color] for color in urn])
    probabilities = [urn[color]/total_num for color in urn]
    
    prob_dict = dict(zip(urn.keys(),probabilities))
    
    # Loop
    while i < iterations:
    
        # Draw
        draw = numpy.random.choice(list(urn.keys()), size=1, p=probabilities)
        print(f"I drew a {draw.item()} ball. There were {total_num} balls, and the probability of drawing {draw.item()} was {prob_dict[draw.item()]}.")
        
        # Reinforce +1 if success!
        if draw == goal:
            urn[goal] += 1
        
        # Re-calculate total number of balls in urn
        total_num = sum([urn[color] for color in urn])
        print(f'Number of balls in urn after reinforcing: {total_num}')
        
        # Update probabilities
        probabilities = [urn[color]/total_num for color in urn]
        prob_dict = dict(zip(urn.keys(), probabilities))
        
        # Increment i
        i += 1
    
    pass

In [16]:
urn

{'blue': 4, 'yellow': 3, 'red': 3}

In [17]:
# Reset urn since we are in a jupyter notebook and 
# changing the same dictionary object over and over.
urn.update({"blue":3, "yellow":3, "red":3})
reinforcement_loop(urn=urn, goal="blue", iterations=50)

I drew a blue ball. There were 9 balls, and the probability of drawing blue was 0.3333333333333333.
Number of balls in urn after reinforcing: 10
I drew a red ball. There were 10 balls, and the probability of drawing red was 0.3.
Number of balls in urn after reinforcing: 10
I drew a yellow ball. There were 10 balls, and the probability of drawing yellow was 0.3.
Number of balls in urn after reinforcing: 10
I drew a red ball. There were 10 balls, and the probability of drawing red was 0.3.
Number of balls in urn after reinforcing: 10
I drew a yellow ball. There were 10 balls, and the probability of drawing yellow was 0.3.
Number of balls in urn after reinforcing: 10
I drew a blue ball. There were 10 balls, and the probability of drawing blue was 0.4.
Number of balls in urn after reinforcing: 11
I drew a blue ball. There were 11 balls, and the probability of drawing blue was 0.45454545454545453.
Number of balls in urn after reinforcing: 12
I drew a blue ball. There were 12 balls, and the 