# Cybernetic Game Theory


In [1]:
# THIS IS ACTIVELY W.I.P. August 2020, JUST A SKETCH BEFORE CONVERTING TO FUNCTIONS ETC.
# This notebook aims to illustrate a toy model of a cybernetic regulator along the lines of W.R. Ashby's work.
# It is instructive to see the game-theoretic foundations of other popular regulators, like Artificial Neural Networks.
# The regulator can "learn" a probability distribution of disturbances, using reinforcement learning.
# The result is effective control, channeling the flow of information from the environment into desired outcomes (states).

In [2]:
import numpy as np
import pandas as pd
import matplotlib as mp
import scipy as sp


In [3]:
# Create a game matrix for two players: Environment and Regulator
# Choose a goal for Regulator
# Environment goes first (row_i)
# Regulator goes second (column_j)
# Outcome is matrix element m_ij

In [4]:
game_matrix = np.random.randint(10, size=(7,5))
game_matrix

array([[2, 2, 3, 4, 2],
       [3, 3, 7, 8, 7],
       [2, 3, 4, 5, 4],
       [4, 4, 2, 5, 0],
       [0, 7, 8, 1, 1],
       [1, 0, 9, 9, 9],
       [1, 0, 0, 3, 4]])

In [5]:
# Rows are plays (a.k.a. "disturbances") for the environment.  
# Create vector to use for pandas index and later to link up with probabilities.
rows = [i+1 for i in range(len(game_matrix))]
df = pd.DataFrame(data = game_matrix, columns=['a','b','c','d','e'], index=rows)

In [6]:
df

Unnamed: 0,a,b,c,d,e
1,2,2,3,4,2
2,3,3,7,8,7
3,2,3,4,5,4
4,4,4,2,5,0
5,0,7,8,1,1
6,1,0,9,9,9
7,1,0,0,3,4


# Environment chooses play (row)

In [7]:
# Create distribution for environmental "plays" or "disturbances".
dist = np.random.dirichlet(alpha=rows)

In [8]:
# Check that we have probabilities summing to 1.
print(dist)
sum(dist)

[0.010609   0.01286502 0.12756375 0.08129511 0.17995911 0.24717586
 0.34053214]


1.0000000000000002

In [9]:
# Choose a play.
environment_play = np.random.choice(df.index, size=1, p=dist)
environment_play

array([3])

# Regulator chooses action (column)

In [45]:
# We can use a Polya urn instead to define probabilities of actions for the regulator.
urn = np.random.randint(100, size=len(df.columns))
urn

array([58, 96, 15, 27, 69])

In [46]:
# Probabilities of drawing from urn
probs = np.array([(i/sum(urn)) for i in urn])
probs

array([0.21886792, 0.36226415, 0.05660377, 0.10188679, 0.26037736])

In [47]:
sum(probs)

1.0

In [48]:
# Choose random draws from plays in the urn with probabilities according to the composition of the urn.
# We actually just care to draw from the plays, and not from the urn itself, although the urn is what will be updated/reinforced.
regulator_action = np.random.choice(df.columns, size=1, p=probs)
regulator_action

array(['e'], dtype=object)

In [49]:
regulator_action.item()

'e'

In [50]:
# Update/Reinforce the action of the regulator.

In [51]:
# Use .item to get the value from the game table out of the locations (rows/columns) encoded in arrays.
out = df.loc[environment_play.item(),regulator_action.item()]
out

4

In [52]:
#  We need to set a goal for the regulator to achieve.
goal = 4

In [53]:
# Define regulator as dictionary of plays and associated probabilities.
regulator = dict(zip(df.columns,urn))
regulator['a']

58

In [58]:
# Compare outcome with goal, and reinforce (increase probability) action which regulator took in response to environment.
if out == goal:
    print("success: reinforced the regulator's action", regulator_action.item(), "from", regulator[regulator_action.item()], "to", regulator[regulator_action.item()]+1)
    regulator[regulator_action.item()] += 1
    print('now we need to recalculate the probabilities according to the reinforced urn')
else: print('fail')

success: reinforced the regulator's action e from 72 to 73
now we need to recalculate the probabilities according to the reinforced urn
