# XAI Chatbot
Python Client for Google Dialogflow API V2 <br>
Copyright 2020 Denis Rothman MIT License. See LICENSE.

In [None]:
!pip install dialogflow

In [None]:
!pwd

/content


In [None]:
import os
import dialogflow_v2 as dialogflow
from google.api_core.exceptions import InvalidArgument

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] ='['/content/'+YOUR PRIVATE KEY]'#'private_key.json'
DIALOGFLOW_PROJECT_ID = '[YOUR PROJECT ID]' #'[PROJECT_ID]' #Project ID
DIALOGFLOW_LANGUAGE_CODE ='en'       #'[LANGUAGE]'
SESSION_ID = '[YOUR PROJECT ID]'

def dialog(our_query):
    #session variables
    session_client = dialogflow.SessionsClient()
    session = session_client.session_path(DIALOGFLOW_PROJECT_ID, SESSION_ID)

    # Our query 
    our_input = dialogflow.types.TextInput(text=our_query, language_code=DIALOGFLOW_LANGUAGE_CODE)
    query = dialogflow.types.QueryInput(text=our_input)

    # try or raise exceptions
    try:
        response = session_client.detect_intent(session=session, query_input=query)
    except InvalidArgument:
        raise

    return response.query_result.fulfillment_text
    


# The Bellman equation, Q-learning, based on the Markov decision process(MDP) 

## The reward matrix

In [None]:
# Markov decision process (MDP) - The Bellman equations adapted to
# Q-learning. Reinforcement learning with the Q action-value(reward) function.
# Copyright 2019 Denis Rothman MIT License. See LICENSE.
import numpy as ql
# R is The Reward Matrix for each state
R = ql.matrix([ [0,0,0,0,1,0],
		            [0,0,0,1,0,1],
		            [0,0,100,1,0,0],
	             	[0,1,1,0,1,0],
		            [1,0,0,1,0,0],
		            [0,1,0,0,0,0] ])

# Q is the Learning Matrix in which rewards will be learned/stored
Q = ql.matrix(ql.zeros([6,6]))

## The learning rate or training penalty

In [None]:
# Gamma: It's a form of penalty or uncertainty for learning
# If the value is 1, the rewards would be too high.
# This way the system knows it is learning.
gamma = 0.8

## Initial state

In [None]:
# agent_s_state. The agent the name of the system calculating
# s is the state the agent is going from and s' the state it's going to
# this state can be random or it can be chosen as long as the rest of the choices
# are not determined. Randomness is part of this stochastic process
agent_s_state = 5

## The random choice of the next state

In [None]:
# The possible "a" actions when the agent is in a given state
def possible_actions(state):
    current_state_row = R[state,]
    possible_act = ql.where(current_state_row >0)[1]
    return possible_act

# Get available actions in the current state
PossibleAction = possible_actions(agent_s_state)
print(PossibleAction)

# This function chooses at random which action to be performed within the range 
# of all the available actions.
def ActionChoice(available_actions_range):
    if(sum(PossibleAction)>0):
        next_action = int(ql.random.choice(PossibleAction,1))
    if(sum(PossibleAction)<=0):
        next_action = int(ql.random.choice(5,1))
        print(next_action)
    return next_action

# Sample next action to be performed
action = ActionChoice(PossibleAction)

[1]


## The Bellman equation

In [None]:
# A version of the Bellman equation for reinforcement learning using the Q function
# This reinforcement algorithm is a memoryless process
# The transition function T from one state to another
# is not in the equation below. T is done by the random choice above

def reward(current_state, action, gamma):
    Max_State = ql.where(Q[action,] == ql.max(Q[action,]))[1]
    
    if Max_State.shape[0] > 1:
        Max_State = int(ql.random.choice(Max_State, size = 1))
    else:
        Max_State = int(Max_State)
    MaxValue = Q[action, Max_State]
    
    # The Bellman MDP based Q function
    Q[current_state, action] = R[current_state, action] + gamma * MaxValue

# Rewarding Q matrix
reward(agent_s_state,action,gamma)

## Running the training episodes randomly

In [None]:
# Learning over n iterations depending on the convergence of the system
# A convergence function can replace the systematic repeating of the process
# by comparing the sum of the Q matrix to that of Q matrix n-1 in the
# previous episode
for i in range(50000):
    current_state = ql.random.randint(0, int(Q.shape[0]))
    PossibleAction = possible_actions(current_state)
    action = ActionChoice(PossibleAction)
    reward(current_state,action,gamma)
    
# Displaying Q before the norm of Q phase
print("Q  :")
print(Q)

# Norm of Q
print("Normed Q :")
print(Q/ql.max(Q)*100)

Q  :
[[  0.      0.      0.      0.    258.44    0.   ]
 [  0.      0.      0.    321.8     0.    207.752]
 [  0.      0.    500.    321.8     0.      0.   ]
 [  0.    258.44  401.      0.    258.44    0.   ]
 [207.752   0.      0.    321.8     0.      0.   ]
 [  0.    258.44    0.      0.      0.      0.   ]]
Normed Q :
[[  0.       0.       0.       0.      51.688    0.    ]
 [  0.       0.       0.      64.36     0.      41.5504]
 [  0.       0.     100.      64.36     0.       0.    ]
 [  0.      51.688   80.2      0.      51.688    0.    ]
 [ 41.5504   0.       0.      64.36     0.       0.    ]
 [  0.      51.688    0.       0.       0.       0.    ]]


# Improving the program by introducing a decision-making process

In [None]:
import random
import numpy as np
# Norm of Q
print("Normed Q :")
print(Q/ql.max(Q)*100)
Qp=Q/ql.max(Q)

Normed Q :
[[  0.       0.       0.       0.      51.688    0.    ]
 [  0.       0.       0.      64.36     0.      41.5504]
 [  0.       0.     100.      64.36     0.       0.    ]
 [  0.      51.688   80.2      0.      51.688    0.    ]
 [ 41.5504   0.       0.      64.36     0.       0.    ]
 [  0.      51.688    0.       0.       0.       0.    ]]


In [None]:
import random
import numpy as np
# Norm of Q
print("Normed Q :")
print(Q/ql.max(Q)*100)
Qp=Q/ql.max(Q)
"""# Improving the program by introducing a decision-making process"""
conceptcode=["A","B","C","D","E","F"]

WIP=[0,0,0,0,0,0] # *****
our_query=""      # *****

print("Sequences")

maxv=1000
mint=450
maxt=500
#sh=ql.zeros((maxv, 2))
for i in range(0,maxv):
    for w in range(0,6):
      WIP[w]=random.randint(0,100)
    print(WIP)  
    print("\n")
    if(np.sum(WIP)>mint and np.sum(WIP)<maxt):
      print(mint,maxt)
      print("Alert!", np.sum(WIP))
      print("Mention MDP or Bellman in your comment, please")
      while our_query !="no" or our_query !="bye":
        our_query=input("Enter your comment or question:")
        if our_query=="no" or our_query=="bye":
          break;
        #print(our_query)
        vresponse=dialog(our_query)
        print(vresponse)
      decision=input("Do you want to continue(enter yes) or stop(enter no) to work with your department before letting the program make a decision:")
      if(decision=="no"):
        break
      mint=460
      maxt=470
    nextc=-1
    nextci=-1
    origin=ql.random.randint(0,6)
     
    print(" ")
    print(conceptcode[int(origin)])
      
    for se in range(0,6):
        if(se==0):
            po=origin
        if(se>0):
            po=nextci
        for ci in range(0,6):
            maxc=Q[po,ci]
            maxp=Qp[po,ci]
            if(maxc>=nextc):
                nextc=maxc
                nextp=maxp
                nextci=ci
                #conceptprob[int(nextci)]=nextp *****
        if(nextci==po):
            break;
        print(conceptcode[int(nextci)])              
print("\n")


# WEB 

[ML Explanation Consult](https://console.dialogflow.com/api-client/demo/embedded/6ba8785d-6b3b-40de-8a2c-fdba7939c220)

[ML Explanation Consult and Share](https://bot.dialogflow.com/6ba8785d-6b3b-40de-8a2c-fdba7939c220)

      