In [1]:
#A program lets play a game by answering questions.

import random #It helps us make random choices.

#This is a function "run" where the game happens.
def run():
    #We have a list of questions that we will ask.
    questions = ['Q1', 'Q2', 'Q3', 'Q4']
    
    #Here are the chances of winning for each question,
    #the higher the number, the easier it is to win.
    probabilities = [0.9, 0.75, 0.5, 0.1]
    
    #These are the rewards we can get for each question if we win,
    #the bigger the number, the more we win.
    rewards = [100, 1000, 10000, 50000]
    
    #This is where we keep track of how much you have won so far.
    total_reward = 0
    
    #Here we will ask each question one by one.
    for index, question in enumerate(questions):
        # We ask you if you want to play the game.
        # You can answer with "Y" for yes or "N" for no.
        decision = input('Do you want to play? (Y/N): ')
        
        #If we don't answer with "Y" or "N", we will ask you again until you give a valid answer.
        while decision not in ['Y', 'N']:
            print('Wrong entry, please answer Y or N')
            decision = input('Do you want to play? (Y/N): ')

        #If we said "Y" (yes), we will play the game:
        if decision == 'Y':
            #The random.choice function helps to make a random choice.
            issue = random.random()
            
            #If the random choice is less than the chance of winning, It means we have won the question.
            if issue < probabilities[index]:
                print('You won the question', index + 1)
                
                #We give a reward based on which question we won (the bigger the reward, the harder the question).
                reward = rewards[index]
                
                #We add the reward to the total amount we have won so far.
                total_reward += reward
            else:
                #If the random choice is not less than the chance of winning, it means we have lost the question.
                print('You lost')
                
                break #We stop the game.
        else:
            #If we choose "N": 
            print('Quit')
            break
    
    #We show you how much we have won:
    print('Your total reward is', total_reward)


In [None]:
#Here we implement a q-learning in the agent:

import random

def run():
    questions = ['Q1', 'Q2', 'Q3', 'Q4']
    probabilities = [0.9, 0.75, 0.5, 0.1]
    rewards = [100, 1000, 10000, 50000]
    total_reward = 0
    
    #We initialize the Q-table with zeros:
    q_table = [[0, 0] for _ in range(len(questions))]
    

    learning_rate = 0.1 #how much the agent learns from each update
    
    discount_factor = 0.6 #how much the agent values future rewards
    
    for index, question in enumerate(questions):
        decision = input('Do you want to play? (Y/N): ')
        
        while decision not in ['Y', 'N']:
            print('Please answer Y or N')
            decision = input('Do you want to play? (Y/N): ')

        if decision == 'Y':
            issue = random.random()
            
            if issue < probabilities[index]:
                print('You won the question', index + 1)
                reward = rewards[index]
                total_reward += reward
                
                #We update the Q-table based on the current state-action pair.
                action = 0  #The agent chooses to play.
                current_state = index
                next_state = index + 1
                
                #We update the Q-value using the Q-learning formula.
                q_value = q_table[current_state][action]
                max_next_q_value = max(q_table[next_state])
                updated_q_value = (1 - learning_rate) * q_value + learning_rate * (reward + discount_factor * max_next_q_value)
                q_table[current_state][action] = updated_q_value
            else:
                print('You lost')
                break
        else:
            print('Quit')
            break
    
    print('Your total reward is', total_reward)

#We run the Q-learning agent:
run()


In [None]:
#Here we implement RL in the agent:

import random

#We initialize the agent's policy:
policy = ['Y', 'N', 'Y', 'N']

#We define the learning rate and exploration rate:
learning_rate = 0.1
exploration_rate = 0.2

def run():
    questions = ['Q1', 'Q2', 'Q3', 'Q4']
    probabilities = [0.9, 0.75, 0.5, 0.1]
    rewards = [100, 1000, 10000, 50000]
    total_reward = 0
    
    for index, question in enumerate(questions):
        decision = input('Do you want to play? (Y/N): ')
        
        while decision not in ['Y', 'N']:
            print('Wrong entry, please answer Y or N')
            decision = input('Do you want to play? (Y/N): ')

        if decision == policy[index] or random.random() < exploration_rate:
            #Agent chooses to play based on the policy or exploration:
            issue = random.random()
            
            if issue < probabilities[index]:
                print('You won the question', index + 1)
                reward = rewards[index]
                total_reward += reward
                
                #We update the policy based on the agent's successful play:
                policy[index] = 'Y'
            else:
                print('You lost')
                break
        else:
            #The agent chooses to quit.
            print('Quit')
            break
    
    print('Your total reward is', total_reward)

#We run the RL agent:
run()
