# Problema de Markov basado en la persona con SII #

Se crea un problema d emarkov ajustado al ambiente de la persona que consume ciertos alimentos y presenta ciertos sintomas

In [1]:
from collections import defaultdict
from utils import argmax
from mdp import MDP, policy_evaluation
from bayesianFood_lite import *
from pgmpy.sampling import BayesianModelSampling


class PersonFoodAndSympthonsMDP(MDP):
    def __init__(self, model, actlist, terminals, reward=None, states=None, gamma=0.9):
        if not (0 < gamma <= 1):
            raise ValueError("An MDP must have 0 < gamma <= 1")
        #bayesian model
        self.model= model
        #symptoms list
        sintomas=pd.read_csv("sintomas.csv", delimiter=';')
        # states
        self.states = states
        ## initial state
        inference = BayesianModelSampling(model)
        initial=inference.forward_sample()
        init=[]
        for b in sintomas['symptom']:
            for a in list(initial):
                if(a==b):
                    init.append((b,initial.iloc[0][a]))
        self.init = tuple(init)
        #actions
        self.actlist = actlist       
        self.terminals = terminals

        self.gamma = gamma

        self.reward = reward or {s: 0 for s in self.states}
    def calculate_reward(self,state):
        aux=0
        for (s,l) in state:
            aux=aux+l
        
        reward=(1/len(state))+1/aux
        return reward

    def getNextState(self, a):
        evidence = a
        inference = BayesianModelSampling(model)
        week=inference.likelihood_weighted_sample(evidence=evidence, size=1)
        status=[]
        foods=[]
        for a in list(week):
            for b in sintomas['symptom']:
                if(a==b):
                    status.append((a,week.iloc[0][a]))
        self.reward[tuple(status)]=self.calculate_reward(status)     
        return tuple(status)

In [2]:
class QLearningAgent:
    """ An exploratory Q-learning agent """
    def __init__(self, mdp, Ne, Rplus, alpha=None):

        self.gamma = mdp.gamma
        self.terminals = mdp.terminals
        self.all_act = mdp.actlist
        self.Ne = Ne  # iteration limit in exploration function
        self.Rplus = Rplus  # large value to assign before iteration limit
        self.Q = defaultdict(float)
        self.Nsa = defaultdict(float)
        self.s = None
        self.a = None
        self.r = None

        if alpha:
            self.alpha = alpha
        else:
            self.alpha = lambda n: 1./(1+n)  # udacity video

    def f(self, u, n):
        """ Exploration function. Returns fixed Rplus until
        agent has visited state, action a Ne number of times.
        Same as ADP agent in book."""
        if n < self.Ne:
            return self.Rplus
        else:
            return u

    def actions_in_state(self, state):
        """ Return actions possible in given state.
            Useful for max and argmax. """
        if state in self.terminals:
            return [None]
        else:
            return self.all_act

    def __call__(self, percept):
        s1, r1 = self.update_state(percept)
        Q, Nsa, s, a, r = self.Q, self.Nsa, self.s, self.a, self.r
        alpha, gamma, terminals = self.alpha, self.gamma, self.terminals,
        actions_in_state = self.actions_in_state
        if s in terminals:
            Q[s, None] = r1
        if s is not None:
            s1=tuple(s1)
            a=tuple(a)
            s=tuple(s)
            Nsa[s, a] += 1
            Q[s, a] += alpha*(Nsa[s, a]) * (r + gamma * max( Q[s1,tuple(a1)] for a1 in actions_in_state(s1)) - Q[s,a])
        if s in terminals:
            self.s = self.a = self.r = None
        else:
            self.s, self.r = s1, r1
            self.a = argmax(actions_in_state(s1), key=lambda a1: self.f(Q[tuple(s1),tuple(a1)], Nsa[tuple(s1), tuple(a1)]))
        return self.a

    def update_state(self, percept):
        """To be overridden in most cases. The default case
        assumes the percept to be of type (state, reward)."""
        return percept


def run_single_trial(agent_program, mdp):
    """Execute trial for given agent_program
    and mdp. mdp should be an instance of subclass
    of mdp.MDP """

    def take_single_action(mdp, s, a):
        """
        Get the next state with the action selected
        """
        state= mdp.getNextState(a)

        return state

    current_state = mdp.init
    while True:
        current_reward = mdp.R(current_state)
        percept = (current_state, current_reward)
        next_action = agent_program(percept)
        if next_action is None:
            break
        current_state = take_single_action(mdp, current_state, next_action)


Probamos el entorno

In [3]:
import pickle
pickled_file = open('sintomas.p','rb')
u=pickle.Unpickler(pickled_file)
states = u.load()


[(('distensionAbdominal', 0),
  ('flatulencia', 0),
  ('dolorAbdominal', 0),
  ('reflujoGastroesofagico', 0),
  ('sintomasDispepsicos', 0),
  ('diarrea', 0),
  ('estrenimiento', 0),
  ('intolerancia', 0),
  ('borborigmos', 0)),
 (('distensionAbdominal', 0),
  ('flatulencia', 0),
  ('dolorAbdominal', 0),
  ('reflujoGastroesofagico', 0),
  ('sintomasDispepsicos', 0),
  ('diarrea', 0),
  ('estrenimiento', 0),
  ('intolerancia', 0),
  ('borborigmos', 1)),
 (('distensionAbdominal', 0),
  ('flatulencia', 0),
  ('dolorAbdominal', 0),
  ('reflujoGastroesofagico', 0),
  ('sintomasDispepsicos', 0),
  ('diarrea', 0),
  ('estrenimiento', 0),
  ('intolerancia', 0),
  ('borborigmos', 2)),
 (('distensionAbdominal', 0),
  ('flatulencia', 0),
  ('dolorAbdominal', 0),
  ('reflujoGastroesofagico', 0),
  ('sintomasDispepsicos', 0),
  ('diarrea', 0),
  ('estrenimiento', 0),
  ('intolerancia', 1),
  ('borborigmos', 0)),
 (('distensionAbdominal', 0),
  ('flatulencia', 0),
  ('dolorAbdominal', 0),
  ('reflujo

In [15]:
states[0]

(('distensionAbdominal', 0),
 ('flatulencia', 0),
 ('dolorAbdominal', 0),
 ('reflujoGastroesofagico', 0),
 ('sintomasDispepsicos', 0),
 ('diarrea', 0),
 ('estrenimiento', 0),
 ('intolerancia', 0),
 ('borborigmos', 0))

In [8]:
import pickle
pickled_file = open('alimentos_lite.p','rb')
u=pickle.Unpickler(pickled_file)
actlist = u.load()


In [9]:
#symptoms list
sintomas=pd.read_csv("sintomas.csv", delimiter=';')
terminals=[]
terminal=[]
for s in sintomas['symptom']:
    terminal.append((s,0))
terminals.append(terminal)
i=0
for s in sintomas['symptom']:
    terminaln=terminal.copy()
    terminaln[i]=(s,1)
    i+=1
    terminals.append(terminaln)


[[('distensionAbdominal', 0),
  ('flatulencia', 0),
  ('dolorAbdominal', 0),
  ('reflujoGastroesofagico', 0),
  ('sintomasDispepsicos', 0),
  ('diarrea', 0),
  ('estrenimiento', 0),
  ('intolerancia', 0),
  ('borborigmos', 0)],
 [('distensionAbdominal', 1),
  ('flatulencia', 0),
  ('dolorAbdominal', 0),
  ('reflujoGastroesofagico', 0),
  ('sintomasDispepsicos', 0),
  ('diarrea', 0),
  ('estrenimiento', 0),
  ('intolerancia', 0),
  ('borborigmos', 0)],
 [('distensionAbdominal', 0),
  ('flatulencia', 1),
  ('dolorAbdominal', 0),
  ('reflujoGastroesofagico', 0),
  ('sintomasDispepsicos', 0),
  ('diarrea', 0),
  ('estrenimiento', 0),
  ('intolerancia', 0),
  ('borborigmos', 0)],
 [('distensionAbdominal', 0),
  ('flatulencia', 0),
  ('dolorAbdominal', 1),
  ('reflujoGastroesofagico', 0),
  ('sintomasDispepsicos', 0),
  ('diarrea', 0),
  ('estrenimiento', 0),
  ('intolerancia', 0),
  ('borborigmos', 0)],
 [('distensionAbdominal', 0),
  ('flatulencia', 0),
  ('dolorAbdominal', 0),
  ('reflujo

In [4]:

model= create_model()

KeyError: 'lentejas'

In [17]:
mdpPerson= PersonFoodAndSympthonsMDP(model, actlist=(actlist[:10]), states=states, terminals=terminals)

In [18]:
mdpPerson.init


(('distensionAbdominal', 2),
 ('flatulencia', 0),
 ('dolorAbdominal', 2),
 ('reflujoGastroesofagico', 0),
 ('sintomasDispepsicos', 1),
 ('diarrea', 1),
 ('estrenimiento', 2),
 ('intolerancia', 1),
 ('borborigmos', 1))

In [19]:
agent_Qlearn=QLearningAgent(mdpPerson, 5, 2, alpha=0.5)

In [20]:
#prueba
a=mdpPerson.getNextState(actlist[7])

#inference = BayesianModelSampling(model)
#week=inference.likelihood_weighted_sample(evidence=evidence, size=1)


obteniendo siguiente estado
[('estrenimiento', 2.0), ('intolerancia', 1.0), ('diarrea', 2.0), ('sintomasDispepsicos', 2.0), ('flatulencia', 1.0), ('dolorAbdominal', 0.0), ('reflujoGastroesofagico', 1.0), ('borborigmos', 0.0), ('distensionAbdominal', 0.0)]


In [21]:
percept = (mdpPerson.init, mdpPerson.calculate_reward(a))
next_action = agent_Qlearn(percept)
next_action

None


(('apio', 0),
 ('fresa', 0),
 ('manzana', 0),
 ('arroz', 0),
 ('cafe', 0),
 ('cebolla', 0),
 ('res', 0),
 ('huevo', 0),
 ('leche', 0),
 ('lechuga', 0),
 ('papa', 0),
 ('platano', 0),
 ('aji', 0),
 ('pollo', 0),
 ('te', 0),
 ('tomate', 0),
 ('frijol', 0))

In [22]:
next_action

(('apio', 0),
 ('fresa', 0),
 ('manzana', 0),
 ('arroz', 0),
 ('cafe', 0),
 ('cebolla', 0),
 ('res', 0),
 ('huevo', 0),
 ('leche', 0),
 ('lechuga', 0),
 ('papa', 0),
 ('platano', 0),
 ('aji', 0),
 ('pollo', 0),
 ('te', 0),
 ('tomate', 0),
 ('frijol', 0))

In [23]:
evidence = next_action
inference = BayesianModelSampling(model)
week=inference.likelihood_weighted_sample(evidence=evidence, size=1)
week


Unnamed: 0,pollo,platano,papa,lechuga,huevo,res,arroz,fresa,apio,manzana,...,sintomasDispepsicos,frijol,flatulencia,cafe,dolorAbdominal,reflujoGastroesofagico,cebolla,borborigmos,distensionAbdominal,_weight
0,0,0,0,0,0,0,0,0,0,0,...,2,0,1,0,0,1,0,1,0,1.291402e-09


In [None]:
run_single_trial(agent_Qlearn, mdpPerson)