# Cybernetic Game Theory


In [1]:
# THIS IS ACTIVELY W.I.P. August-October 2020.  Began transforming sketch into functions.  
# Instead of deleting sketch work, I am just commenting out to preserve thought process.
# Next step is to separate out functions and create new notebook/script to continue development.

# This notebook aims to illustrate a toy model of a cybernetic regulator along the lines of W.R. Ashby's work.
# It is instructive to see the game-theoretic foundations of other popular regulators, like Artificial Neural Networks.
# The regulator can "learn" a probability distribution of disturbances, using reinforcement learning.
# The result is effective control, channeling the flow of information from the environment into desired outcomes (states).

In [2]:
import numpy as np
import pandas as pd

In [3]:
# Create a game matrix for two players: Environment and Regulator
# Choose a goal for Regulator
# Environment goes first (row_i)
# Regulator goes second (column_j)
# Outcome is matrix element m_ij

In [4]:
game_matrix = np.random.randint(10, size=(7,5))
game_matrix

array([[6, 0, 8, 9, 5],
       [2, 6, 5, 9, 0],
       [7, 6, 3, 2, 9],
       [7, 9, 9, 9, 8],
       [3, 3, 8, 6, 7],
       [2, 5, 7, 6, 5],
       [8, 2, 8, 7, 5]])

In [5]:
# Rows are plays (a.k.a. "disturbances") for the environment.  
# Create vector to use for pandas index and later to link up with probabilities.
def create_game(size):
    game_matrix = np.random.randint(10, size=size)
    rows = [i+1 for i in range(len(game_matrix))]
    print(rows)
    return pd.DataFrame(data = game_matrix, columns=['a','b','c','d','e'], index=rows)


In [6]:
game = create_game((7,5))
game

[1, 2, 3, 4, 5, 6, 7]


Unnamed: 0,a,b,c,d,e
1,5,7,8,2,7
2,3,2,8,9,2
3,8,4,0,7,9
4,9,4,0,5,2
5,3,7,7,4,0
6,8,8,5,4,6
7,8,9,9,4,2


# Environment chooses play (row)

In [8]:
# Create distribution for environmental "plays" or "disturbances".
#dist = np.random.dirichlet(alpha=rows)

In [9]:
# Check that we have probabilities summing to 1.
#print(dist)
#sum(dist)

In [10]:
# Choose a play.
def environment_play(game):
    '''
    
    '''
    dist = np.random.dirichlet(alpha=game.index)
    return np.random.choice(game.index, size=1, p=dist)

In [11]:
env_play = environment_play(game).item()
env_play

6

# Regulator chooses action (column)

In [12]:
# We can use a Polya urn instead to define probabilities of actions for the regulator.
urn = np.random.randint(10, size=len(game.columns))
urn

array([5, 5, 7, 5, 3])

In [13]:
# Probabilities of drawing from urn
probs = np.array([(i/sum(urn)) for i in urn])
probs

array([0.2 , 0.2 , 0.28, 0.2 , 0.12])

In [14]:
sum(probs)

1.0

In [15]:
# Choose random draws from plays in the urn with probabilities according to the composition of the urn.
# We actually just care to draw from the plays, and not from the urn itself, although the urn is what will be updated/reinforced.
def regulator_action(game,probs):
    return np.random.choice(game.columns, size=1, p=probs)


In [16]:
reg_action = regulator_action(game,probs).item()
reg_action

'a'

# Update/Reinforce the action of the regulator.

In [17]:
# Use .item to get the value from the game table out of the locations (rows/columns) encoded in arrays.
out = game.loc[env_play,reg_action]
out

8

In [18]:
# Convince yourself that this row/column outcome corresponds to the game.
game

Unnamed: 0,a,b,c,d,e
1,5,7,8,2,7
2,3,2,8,9,2
3,8,4,0,7,9
4,9,4,0,5,2
5,3,7,7,4,0
6,8,8,5,4,6
7,8,9,9,4,2


In [19]:
#  We need to set a goal for the regulator to achieve.
goal = 8

In [20]:
# Define regulator as dictionary of plays and associated probabilities.
regulator = dict(zip(game.columns,urn))
regulator

{'a': 5, 'b': 5, 'c': 7, 'd': 5, 'e': 3}

In [21]:
# Compare outcome with goal, and reinforce (increase probability) action which regulator took in response to environment.
# It makes sense to reinforce individual successes more than we weaken, since we expect to fail a lot in the beginning.
# Try adding len(regulator), although this may be impractical for some examples.
#if out == goal:
#    print("success: reinforced the regulator's action", reg_action, "from", regulator[reg_action], "to", regulator[reg_action] + len(regulator))
#    regulator[reg_action] += len(regulator)
#    print('now we need to recalculate the probabilities according to the reinforced urn')
#else: print('fail')

In [22]:
# Confirm updated urn.
#regulator

In [23]:
# Recalculate probabilities and confirm that correct play increases in probability and all others go down.
def prob_calc(regulator_dict):
    sum_reg = np.array(sum([regulator_dict[i] for i in regulator_dict]))
    return np.array([regulator_dict[i]/sum_reg for i in regulator_dict])
    #sum(updated_probs),updated_probs

In [24]:
#prob_calc(regulator)

In [25]:
# Original probs
#probs

In [26]:
# We also want the regulator to update when it fails.  How might this look?
# Remember that the regulator's action distribution was random (the composition of the urn).  
# Consider that the regulator was perhaps overconfident, how can we make the "urn" less "confident"?
# Lets "sqeeze" the distribution in the urn, by making it less confident for any particular action.

# We could make drastic changes to our distribution, but we should keep in mind that we may want to use 
# the same fail-update procedure over and over again in an automated learning process.  
# If our "squeeze" is too drastic (e.g. making the urn parts equal / probabilities uniform) then even if
# our regulator is "learned" but makes a mistake, it will "forget" the learned distribution.

# We can still use the mean of the urn composition in a function that slightly squeezes, 
# incrementing those actions which are below the mean, and decrementing those below.

In [27]:
# Notice how multiple (failing) applications of this function will "converge" around the mean.
# More precisely, it will slightly oscillate around the convergence point.
def squeeze(regulator_dict, urn_list):
    '''
    This update function takes two arguments.
    
    regulator_dict: a regulator defined as a dictionary of key labels (plays or columns) 
    and integer values (from a distribution or urn).
    
    urn_list: a list of integers interpreted as the composition of a Polya urn.
    
    The function calculates the mean of the urn composition, and compares each value
    in the regulator_dict with the mean.  
    
    The resulting regulator_dict is updated by incrementing values smaller than the
    mean, and decrementing values greater than the mean.
    
    '''
    mean = np.mean(urn_list)
    for i in regulator_dict:
        if regulator_dict[i] >= mean:
            regulator_dict[i] -= 1
            print('squeeze down:', regulator_dict[i])
        else:
            regulator_dict[i] += 1
            print('squeeze up:', regulator_dict[i])



In [28]:
# Our new function which includes update protocols for both success and failure.
def update(regulator_dict,action,out,goal,urn_list):
    if out == goal:
        #print(action)
        print("success: reinforced the regulator's action", action, "from", regulator_dict[action], "to", regulator_dict[action]+len(regulator_dict))
        regulator_dict[action] += len(regulator_dict)
        print('now we need to recalculate the probabilities according to the reinforced urn')
    else:
        print('fail: squeezing. (incrementally adjusting composition of regulator urn towards mean)')
        squeeze(regulator_dict, urn_list)



In [29]:
update(regulator,reg_action,out,goal,urn)

success: reinforced the regulator's action a from 5 to 10
now we need to recalculate the probabilities according to the reinforced urn


In [30]:
regulator


{'a': 10, 'b': 5, 'c': 7, 'd': 5, 'e': 3}

In [31]:
prob_calc(regulator)

array([0.33333333, 0.16666667, 0.23333333, 0.16666667, 0.1       ])

In [32]:
# original probabilities
probs

array([0.2 , 0.2 , 0.28, 0.2 , 0.12])

In [30]:
squeeze??

In [59]:
len?

In [36]:
# Now lets repetitively train the regulator on multiple disturbances from the environment
# After each outcome, we will update, hoping to improve the likelihood of successful plays.

def train(game,regulator,epochs):
    urn = np.random.randint(100, size=len(game.columns))
    probs = np.array([(i/sum(urn)) for i in urn])
    i=1
    while i <= epochs:
        
        # Environment chooses play.
        play = environment_play(game)
        
        # Regulator chooses action.
        action = regulator_action(game,probs)
        
        # 
        
        # Recalculate regulator probabilities.
        
        
        #Increment i.
        i += 1
    return None

In [11]:
n = type(int)

In [13]:
n = 'four'

In [14]:
type(n)

str

In [15]:
n

'four'