# Problema de Markov basado en la persona con SII #

Se crea un problema d emarkov ajustado al ambiente de la persona que consume ciertos alimentos y presenta ciertos sintomas

In [36]:
from collections import defaultdict
from utils import argmax
from mdp import MDP, policy_evaluation
from bayesianFood_lite import *
from pgmpy.sampling import BayesianModelSampling
import time

class PersonFoodAndSympthonsMDP(MDP):
    def __init__(self, model, actlist, terminals, reward=None, states=None, gamma=0.9):
        if not (0 < gamma <= 1):
            raise ValueError("An MDP must have 0 < gamma <= 1")
        #bayesian model
        self.model= model
        #symptoms list
        sintomas=pd.read_csv("sintomas.csv", delimiter=';')
        # states
        self.states = states
        ## initial state
        inference = BayesianModelSampling(self.model)
        initial=inference.forward_sample()
        init=[]
        for b in sintomas['symptom']:
            for a in list(initial):
                if(a==b):
                    init.append((b,initial.iloc[0][a]))
        self.init = tuple(init)
        #actions
        self.actlist = actlist       
        self.terminals = terminals

        self.gamma = gamma

        self.reward = reward or {s: 0 for s in self.states}
    def calculate_reward(self,state):
        aux=0
        for (s,l) in state:
            aux=aux+l
        
        reward=-aux
        return reward

    def getNextState(self, a):
        evidence = a
        inference = BayesianModelSampling(self.model)
        week=inference.likelihood_weighted_sample(evidence=evidence, size=1)
        status=[]
        foods=[]
        for b in sintomas['symptom']:
            for a in list(week):
                if(a==b):
                    status.append((b,week.iloc[0][a]))
        self.reward[tuple(status)]=self.calculate_reward(status)     
        return tuple(status)

In [32]:
import csv
class QLearningAgent:
    """ An exploratory Q-learning agent """
    def __init__(self, mdp, Ne, Rplus, alpha=None): #alpha 0.25 0.5 0.75

        self.gamma = mdp.gamma
        self.terminals = mdp.terminals
        self.all_act = mdp.actlist
        self.Ne = Ne  # iteration limit in exploration function
        self.Rplus = Rplus  # large value to assign before iteration limit
        self.Q = defaultdict(float)
        self.Nsa = defaultdict(float)
        self.s = None
        self.a = None
        self.r = None

        if alpha:
            self.alpha = alpha
        else:
            self.alpha = lambda n: 1./(1+n)  # udacity video

    def f(self, u, n):
        """ Exploration function. Returns fixed Rplus until
        agent has visited state, action a Ne number of times.
        Same as ADP agent in book."""
        if n < self.Ne:
            return self.Rplus
        else:
            return u

    def actions_in_state(self, state):
        """ Return actions possible in given state.
            Useful for max and argmax. """
        if state in self.terminals:
            return [None]
        else:
            return self.all_act

    def __call__(self, percept):
        s1, r1 = self.update_state(percept)
        Q, Nsa, s, a, r = self.Q, self.Nsa, self.s, self.a, self.r
        alpha, gamma, terminals = self.alpha, self.gamma, self.terminals,
        actions_in_state = self.actions_in_state
        if r1 >=-2:
            Q[s, None] = r1
        if s is not None:
            s1=tuple(s1)
            a=tuple(a)
            s=tuple(s)
            Nsa[s, a] += 1
            Q[s, a] += alpha*(Nsa[s, a]) * (r + gamma * max( Q[s1,tuple(a1)] for a1 in actions_in_state(s1)) - Q[s,a])
        if r1 >=-2:
            self.s = self.a = self.r = None
        else:
            self.s, self.r = s1, r1
            self.a = argmax(actions_in_state(s1), key=lambda a1: self.f(Q[tuple(s1),tuple(a1)], Nsa[tuple(s1), tuple(a1)]))
        return self.a

    def update_state(self, percept):
        """To be overridden in most cases. The default case
        assumes the percept to be of type (state, reward)."""
        return percept


def run_single_trial(agent_program, mdp):
    """Execute trial for given agent_program
    and mdp. mdp should be an instance of subclass
    of mdp.MDP """

    def take_single_action(mdp, s, a):
        """
        Get the next state with the action selected
        """
        state= mdp.getNextState(a)

        return state

    current_state = mdp.init
    number_states=0
    acumulated_reward=0
    while True:
        current_reward = mdp.calculate_reward(current_state)
        acumulated_reward+=current_reward
        percept = (current_state, current_reward)
        next_action = agent_program(percept)
        if next_action is None:
            
            break
        number_states+=1
        current_state = take_single_action(mdp, current_state, next_action)
    single_trial=[number_states,acumulated_reward]
    return single_trial
    #print ("El numero de estados "+ str(number_states))
    #print ("El acumulado de rewar final "+ str(acumulated_reward))


Probamos el entorno

In [29]:
import pickle
pickled_file = open('sintomas.p','rb')
u=pickle.Unpickler(pickled_file)
states = u.load()


In [30]:
states[0]

(('distensionAbdominal', 0),
 ('flatulencia', 0),
 ('dolorAbdominal', 0),
 ('reflujoGastroesofagico', 0),
 ('sintomasDispepsicos', 0),
 ('diarrea', 0),
 ('estrenimiento', 0),
 ('intolerancia', 0),
 ('borborigmos', 0))

In [6]:
import pickle
pickled_file = open('alimentos_lite.p','rb')
u=pickle.Unpickler(pickled_file)
actlist = u.load()


In [7]:
#symptoms list
sintomas=pd.read_csv("sintomas.csv", delimiter=';')
terminals=[]
terminal=[]
for s in sintomas['symptom']:
    terminal.append((s,0))
terminals.append(terminal)
i=0
for s in sintomas['symptom']:
    terminaln=terminal.copy()
    terminaln[i]=(s,1)
    i+=1
    terminals.append(terminaln)

In [37]:
model= create_model()

In [38]:
from random import shuffle
shuffle(actlist)

In [40]:
mdpPerson= PersonFoodAndSympthonsMDP(model, actlist=(actlist[:30]), states=states, terminals=terminals)

In [11]:
agent_Qlearn=QLearningAgent(mdpPerson, 5, 2, alpha=0.5)

In [99]:
run_single_trial(agent_Qlearn, mdpPerson)

In [46]:
import csv
 
myData = [["Trial", "Number_states", "Accumulated_reward","Time"]]

agent_Qlearn=QLearningAgent(mdpPerson, 5, 1, alpha=0.75)
for i in range (200):
    start = time.time()
    data=run_single_trial(agent_Qlearn, mdpPerson)
    end = time.time()
    trial_time=end - start
    myData.append([i,data[0],data[1], trial_time])


In [49]:
myFile = open('test_alpha75.csv', 'w')    
with myFile:
    writer = csv.writer(myFile)
    writer.writerows(myData)

In [48]:
pickle.dump(agent_Qlearn.Q, open( "Q75.p", "wb" ))

In [42]:
import pickle 
pickle.dump(mdpPerson, open( "mdpPerson.p", "wb" ))

In [49]:
 
myData = [["Trial", "Number_states", "Accumulated_reward","Time"]]

agent_Qlearn=QLearningAgent(mdpPerson, 5, 1, alpha=0.5)
for i in range (200):
    start = time.time()
    data=run_single_trial(agent_Qlearn, mdpPerson)
    end = time.time()
    trial_time=end - start
    myData.append([i,data[0],data[1], trial_time])
    

myFile = open('test_alpha5.csv', 'wb')    
with myFile:
    writer = csv.writer(myFile)
    writer.writerows(myData)

In [44]:
pickle.dump(agent_Qlearn.Q, open( "Q5.p", "wb" ))

In [56]:
myData = [["Trial", "Number_states", "Accumulated_reward","Time"]]

agent_Qlearn=QLearningAgent(mdpPerson, 5, 1, alpha=0.25)
for i in range (200):
    start = time.time()
    data=run_single_trial(agent_Qlearn, mdpPerson)
    end = time.time()
    trial_time=end - start
    myData.append([i,data[0],data[1], trial_time])
    



In [52]:
myFile = open('test_alpha25.csv', 'w')    
with myFile:
    writer = csv.writer(myFile)
    writer.writerows(myData)

In [55]:
pickle.dump(agent_Qlearn.Q, open( "Q25.p", "wb" ))