In [1]:
import networkx as nx
import matplotlib.pyplot as plt

In [23]:
import numpy as np
import random

class MDP:
    def __init__(self, states, actions, transitions, rewards):
        self.states = states
        self.actions = actions
        self.transitions = transitions
        self.rewards = rewards
        self.current_state = random.choice(list(states.keys()))

    def reset(self):
        self.current_state = random.choice(list(self.states.keys()))
        return self.current_state

    def step(self, action):
        if action not in self.actions[self.current_state]:
            raise ValueError("Invalid action")

        outcomes = self.transitions[self.current_state][action]
        next_state = random.choices(list(outcomes.keys()), weights=outcomes.values())[0]
        reward = self.rewards[self.current_state][action][next_state]
        self.current_state = next_state

        done = not bool(self.transitions[self.current_state])
        return next_state, reward, done

    def get_available_actions(self):
        return self.actions[self.current_state]

states = {
    'You have Food': None,
    'You dont have Food': None,
    'Neighbour suspect you': None
}

actions = {
    'You have Food': ['Eat own food', 'Take neighbour Food'],
    'You dont have Food': ['Buy Food', 'Take neighbour Food'],
    'Neighbour suspect you': ['Buy Food', 'Take neighbour Food']
}

transitions = {
    'You have Food': {
        'Eat own food': {'You dont have Food': 1.0},
        'Take neighbour Food': {'Neighbour suspect you': 0.2, 'You have Food': 0.8}
    },
    'You dont have Food': {
        'Buy Food': {'You have Food': 1.0},
        'Take neighbour Food': {'Neighbour suspect you': 0.5, 'You have Food': 0.5}
    },
    'Neighbour suspect you': {
        'Buy Food': {'You have Food': 1.0},
        'Take neighbour Food': {'Neighbour suspect you': 0.9, 'You have Food': 0.1}
    }
}

rewards = {
    'You have Food': {
        'Eat own food': {'You dont have Food': 1},
        'Take neighbour Food': {'Neighbour suspect you': -1, 'You have Food': 1}
    },
    'You dont have Food': {
        'Buy Food': {'You have Food': -2},
        'Take neighbour Food': {'Neighbour suspect you': -1, 'You have Food': 0}
    },
    'Neighbour suspect you': {
        'Buy Food': {'You have Food': -2},
        'Take neighbour Food': {'Neighbour suspect you': -5, 'You have Food': 1}
    }
}

mdp = MDP(states, actions, transitions, rewards)
current_state = mdp.reset()

print("Initial State:", current_state)

available_actions = mdp.get_available_actions()

print("Available Actions:", available_actions)

action_to_take = available_actions[0]
new_state, reward, done = mdp.step(action_to_take)
print(f"Action Taken: {action_to_take}, New State: {new_state}, Reward: {reward}, Finish: {done}")


Initial State: Neighbour suspect you
Available Actions: ['Buy Food', 'Take neighbour Food']
Action Taken: Buy Food, New State: You have Food, Reward: -2, Finish: False
