# Cybernetic Game Theory


In [1]:
# THIS IS ACTIVELY W.I.P. August 2020, JUST A SKETCH BEFORE CONVERTING TO FUNCTIONS ETC.
# This notebook aims to illustrate a toy model of a cybernetic regulator along the lines of W.R. Ashby's work.
# It is instructive to see the game-theoretic foundations of other popular regulators, like Artificial Neural Networks.
# The regulator can "learn" a probability distribution of disturbances, using reinforcement learning.
# The result is effective control, channeling the flow of information from the environment into desired outcomes (states).

In [2]:
import numpy as np
import pandas as pd
import matplotlib as mp
import scipy as sp


In [3]:
# Create a game matrix for two players: Environment and Regulator
# Choose a goal for Regulator
# Environment goes first (row_i)
# Regulator goes second (column_j)
# Outcome is matrix element m_ij

In [4]:
game_matrix = np.random.randint(10, size=(7,5))
game_matrix

array([[0, 1, 6, 8, 6],
       [5, 6, 3, 3, 7],
       [6, 4, 0, 2, 1],
       [9, 4, 8, 6, 9],
       [7, 0, 5, 3, 8],
       [6, 4, 1, 0, 9],
       [2, 5, 7, 1, 6]])

In [5]:
# Rows are plays (a.k.a. "disturbances") for the environment.  
# Create vector to use for pandas index and later to link up with probabilities.
rows = [i+1 for i in range(len(game_matrix))]
df = pd.DataFrame(data = game_matrix, columns=['a','b','c','d','e'], index=rows)

In [6]:
df

Unnamed: 0,a,b,c,d,e
1,0,1,6,8,6
2,5,6,3,3,7
3,6,4,0,2,1
4,9,4,8,6,9
5,7,0,5,3,8
6,6,4,1,0,9
7,2,5,7,1,6


# Environment chooses play (row)

In [7]:
# Create distribution for environmental "plays" or "disturbances".
dist = np.random.dirichlet(alpha=rows)

In [8]:
# Check that we have probabilities summing to 1.
print(dist)
sum(dist)

[0.05290195 0.04599036 0.05520273 0.16762703 0.2817019  0.10534903
 0.291227  ]


0.9999999999999999

In [9]:
# Choose a play.
environment_play = np.random.choice(df.index, size=1, p=dist)
environment_play

array([4])

# Regulator chooses action (column)

In [10]:
# We can use a Polya urn instead to define probabilities of actions for the regulator.
urn = np.random.randint(100, size=len(df.columns))
urn

array([81, 61,  5, 49, 19])

In [11]:
# Probabilities of drawing from urn
probs = np.array([(i/sum(urn)) for i in urn])
probs

array([0.37674419, 0.28372093, 0.02325581, 0.22790698, 0.08837209])

In [12]:
sum(probs)

1.0

In [13]:
# Choose random draws from plays in the urn with probabilities according to the composition of the urn.
# We actually just care to draw from the plays, and not from the urn itself, although the urn is what will be updated/reinforced.
regulator_action = np.random.choice(df.columns, size=1, p=probs)
regulator_action

array(['b'], dtype=object)

In [14]:
regulator_action.item()

'b'

In [15]:
# Update/Reinforce the action of the regulator.

In [16]:
# Use .item to get the value from the game table out of the locations (rows/columns) encoded in arrays.
out = df.loc[environment_play.item(),regulator_action.item()]
out

4

In [17]:
#  We need to set a goal for the regulator to achieve.
goal = 4

In [18]:
# Define regulator as dictionary of plays and associated probabilities.
regulator = dict(zip(df.columns,urn))
regulator['a']

81

In [19]:
# Compare outcome with goal, and reinforce (increase probability) action which regulator took in response to environment.
if out == goal:
    print("success: reinforced the regulator's action", regulator_action.item(), "from", regulator[regulator_action.item()], "to", regulator[regulator_action.item()]+1)
    regulator[regulator_action.item()] += 1
    print('now we need to recalculate the probabilities according to the reinforced urn')
else: print('fail')

success: reinforced the regulator's action b from 61 to 62
now we need to recalculate the probabilities according to the reinforced urn


In [23]:
# Confirm updated urn.
regulator

{'a': 81, 'b': 62, 'c': 5, 'd': 49, 'e': 19}

In [33]:
# Recalculate probabilities and confirm that correct play increases in probability and all others go down.
sum_reg = sum([regulator[i] for i in regulator])
updated_probs = [regulator[i]/sum_reg for i in regulator]
sum(updated_probs),updated_probs

(1.0,
 [0.375,
  0.28703703703703703,
  0.023148148148148147,
  0.22685185185185186,
  0.08796296296296297])