# Exam

## 1. Gameshow

In [1]:
trans_probs = {('q1', 'play'): {'q2': 0.9, 'lose': 0.1},
               ('q2', 'play'): {'q3': 0.75, 'lose': 0.25},
               ('q3', 'play'): {'q4': 0.5, 'lose': 0.5},
               ('q4', 'play'): {'win': 0.1, 'lose': 0.9},
               ('q2', 'quit'): {'q2quit': 1},
               ('q3', 'quit'): {'q3quit': 1},
               ('q4', 'quit'): {'q4quit': 1}}

rewards = {'q1': 0, 'q2': 0, 'q3': 0, 'q4': 0,
           'win': 61100,
           'q2quit': 100,
           'q3quit': 1100,
           'q4quit': 11100,
           'lose': 0}

policy = {'q1': 'play', 'q2': 'play', 'q3': 'quit', 'q4': 'play'}

q_values = {('q1', 'play'): 0,
            ('q2', 'play'): 0,
            ('q3', 'play'): 0,
            ('q4', 'play'): 0,
            ('q2', 'quit'): 0,
            ('q3', 'quit'): 0,
            ('q4', 'quit'): 0}

In [2]:
def compute_q(state, action, policy, gamma):
    q = 0
    for next_state in trans_probs[(state, action)]:
        
        if next_state not in ['lose', 'win', 'q2quit', 'q3quit', 'q4quit']:
            next_q = q_values[(next_state, policy[next_state])]
        else:
            next_q = 0
                          
        q += trans_probs[(state, action)][next_state] * (rewards[next_state] + gamma * next_q)
        
    return q

In [3]:
gamma = 0.9

for i in range(10):
    for (state, action) in trans_probs:
        q_values[state, action] = compute_q(state, action, policy, gamma)
        
    print(f'Iteration {i}:')
    print(q_values, end='\n\n')

Iteration 0:
{('q1', 'play'): 0.0, ('q2', 'play'): 0.0, ('q3', 'play'): 0.0, ('q4', 'play'): 6110.0, ('q2', 'quit'): 100.0, ('q3', 'quit'): 1100.0, ('q4', 'quit'): 11100.0}

Iteration 1:
{('q1', 'play'): 0.0, ('q2', 'play'): 742.5, ('q3', 'play'): 2749.5, ('q4', 'play'): 6110.0, ('q2', 'quit'): 100.0, ('q3', 'quit'): 1100.0, ('q4', 'quit'): 11100.0}

Iteration 2:
{('q1', 'play'): 601.4250000000001, ('q2', 'play'): 742.5, ('q3', 'play'): 2749.5, ('q4', 'play'): 6110.0, ('q2', 'quit'): 100.0, ('q3', 'quit'): 1100.0, ('q4', 'quit'): 11100.0}

Iteration 3:
{('q1', 'play'): 601.4250000000001, ('q2', 'play'): 742.5, ('q3', 'play'): 2749.5, ('q4', 'play'): 6110.0, ('q2', 'quit'): 100.0, ('q3', 'quit'): 1100.0, ('q4', 'quit'): 11100.0}

Iteration 4:
{('q1', 'play'): 601.4250000000001, ('q2', 'play'): 742.5, ('q3', 'play'): 2749.5, ('q4', 'play'): 6110.0, ('q2', 'quit'): 100.0, ('q3', 'quit'): 1100.0, ('q4', 'quit'): 11100.0}

Iteration 5:
{('q1', 'play'): 601.4250000000001, ('q2', 'play'): 742

We see convergence after 3 iterations. Final q values:

In [4]:
q_values

{('q1', 'play'): 601.4250000000001,
 ('q2', 'play'): 742.5,
 ('q3', 'play'): 2749.5,
 ('q4', 'play'): 6110.0,
 ('q2', 'quit'): 100.0,
 ('q3', 'quit'): 1100.0,
 ('q4', 'quit'): 11100.0}

## 2. Neural network

In [5]:
import numpy as np
from neural_network import *

np.random.seed(11)

In [6]:
class DecisionNeuron(Neuron):
    def __init__(self, n_inputs):
        self.V = 0  # Membrane potential
        self.n_inputs = n_inputs
        self.weights = np.array([1, 1, 0, 1])
    
    # Setters
    def clearActivity(self):
        self.V = 0
    
    # Getters
    def getOutput(self):
        return self.V
        
    # Methods
    def integration(self, inputs):
        inputSum = np.sum(self.weights * inputs)
        self.V = 1 / (1 + np.exp(-inputSum))  # Sigmoid activation function

In [7]:
class DecisionNetwork:
    def __init__(self, n_neurons, n_inputs, n_outputs):
            
        self.n_neurons = n_neurons
        self.neurons = [DecisionNeuron(n_inputs) for i in range(n_neurons)]
        
    # Setters
    def clearActivity(self):
        for neuron in self.neurons:
            neuron.clearActivity()
    
    # Getters
    def getOutput(self):
        return [neuron.getOutput() for neuron in self.neurons]
      
    # Methods
    def update(self, inputs):
        for neuron in self.neurons:
            neuron.integration(inputs)
            
    def run(self, inputs):
        self.update(inputs)
        return np.mean([neuron.V for neuron in self.neurons])

In [8]:
def experimentalFrame(question, decisionNet, computationNet):
    IN = questions[question]
    decision_value = decisionNet.run(IN)

    # this is actually part of the model, no time to fix it
    if decision_value <= 0.5:
        decision = 'quit'
    else:
        decision = 'play'

    computationNet.clearActivity()
    OUT = computationNet.run(3*decision_value, 0.5)

    print(f'Question: {question}')
    print(f'Decision: {decision}')
    print(f'Duration: {OUT[0]}')

In [9]:
questions = {'q1': [1,0,0,0], 
             'q2': [0,1,0,0], 
             'q3': [0,0,1,0], 
             'q4': [0,0,0,1]}

d = DecisionNetwork(4, 4, 1)
net = Network(n_layers=100, n_neurons=100, n_inputs=1, n_outputs=1, 
              ntype='lif', threshold=1, resetPot=0.5, memTimeConst=1)

for question in questions:
    experimentalFrame(question, d, net)

Question: q1
Decision: play
Duration: 0.83
Question: q2
Decision: play
Duration: 0.83
Question: q3
Decision: quit
Duration: 0.45
Question: q4
Decision: play
Duration: 0.83


### 2. UML

First just a simple UML diagram of the class structure of the simulator

![image.png](attachment:df6d155e-23b5-4c8d-8161-48d6ef205001.png)

Now a UML diagram of the gameshow, including experimental frame, model, states, and transition function

![image.png](attachment:160717f9-112d-483a-b714-8d9d2b46cef6.png)

The **experimental Frame** is the game show. The model receives questions as inputs, and gives as outputs a decision and a duration of activity. Our assumption is that the activity of playing lasts longer than the activity of quitting. 

As **model**, we have 2 neural networks: one takes the question as input and makes a decision based on the above policy $\pi$. The decision is passed on to the second network. The second network corresponds to the neural activity of the participant. This network has 2 possible **states**, 'play' and 'quit', and both states have their individual duration of activity. The second network **transitions** from one state to another through the input given by the decision network. 

As a **simulator**, we use the neural network from the exercises with 100 layers of 100 LIF neurons each for the second network. In each layer, the network computes the mean of the membrane potentials of all neuron (in the layer). If this value is above a certain threshold (0.5) the network stops, and the duration of its activity is the number of the layer divided by the total number of layers. For the decision network, we use a single layer of 4 sigmoid neurons. They take the boolean question arrays as inputs and out a single value. If this value is 0.5 or lower, we say that the decision is 'quit', otherwise, it is 'play'.

Note, that the decision network produces the behavior of the above policy only because the weights are hard-coded to produce it. Also, the activity of 'play' being longer that 'quit' is not necessarily true for other decision or activity networks - changing the seed could produce drastically different behaviour. As a general model, this would be a very bad choice!

**Algorithm**

```
function experimentalFrame():
    decision = decisionNetwork(question)  # Get decision from the decision network
    duration = activityNetwork(decision)  # Get duration based on the decision
    
    return decision, duration

function decisionNetwork(question):
    output = neuralNetwork(question)  # Run the neural network with question as input
    if output <= 0.5:
        decision = 'quit'
    else:
        decision = 'play'
    
    return decision

function activityNetwork(decision):
    

```