In [1]:
from flask import Flask, render_template, jsonify
from flask_socketio import SocketIO, emit
import gym
from gym import spaces
import math
import numpy as np
import requests

# Q-learning agent definition
class QLearningAgent:
    def __init__(self, n_actions, n_states, alpha=0.1, gamma=0.9, epsilon=0.1):
        self.n_actions = n_actions
        self.q_table = np.zeros((n_states, n_states, n_actions))
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon

    def choose_action(self, state):
        if np.random.uniform(0, 1) < self.epsilon:
            return np.random.choice(self.n_actions)
        else:
            return np.argmax(self.q_table[state[0], state[1]])

    def learn(self, state, action, reward, next_state):
        predict = self.q_table[state[0], state[1], action]
        target = reward + self.gamma * np.max(self.q_table[next_state[0], next_state[1]])
        self.q_table[state[0], state[1], action] += self.alpha * (target - predict)

# Discretize the environment
def discretize(state, bins=50):
    return tuple((np.array(state) * bins // 500).astype(int))

class RectangleMoveEnv(gym.Env):
    def __init__(self):
        super(RectangleMoveEnv, self).__init__()

        # Define action and observation space
        # Let's assume action space is discrete: 0: Up, 1: Down, 2: Left, 3: Right
        self.action_space = spaces.Discrete(4)
        
        # Define the observation space as the position of the rectangle. This is a simplification.
        # You can expand this to include more features if needed.
        self.observation_space = spaces.Box(low=0, high=500, shape=(2,), dtype=float)  # Assuming canvas size of 500x500 for illustration

    def reset(self):
        # For simplicity, reset will return the rectangle to the center of the canvas
        # This can be expanded to communicate with Flask server to get an actual reset position
        return [250, 250]

    def step(self, action):
        # Translate the action to the parameters of move_rectangle function
        # For simplicity, let's assume fixed speed, distance, and angle based on the action
        mappings = {
            0: {'speed': 50, 'angle': 0, 'distance': 10},   # Up
            1: {'speed': 50, 'angle': 180, 'distance': 10}, # Down
            2: {'speed': 50, 'angle': 270, 'distance': 10}, # Left
            3: {'speed': 50, 'angle': 90, 'distance': 10}   # Right
        }

        params = mappings[action]
        response = requests.post('http://127.0.0.1:5000/move_rectangle', json=params)
        data = response.json()

        # Get the new position (state) from the data
        new_position = [data['new_x'], data['new_y']]
        
        # Check border collision
        border_hit_response = requests.get('http://127.0.0.1:5000/border_hit')
        hit_data = border_hit_response.json()
        done = hit_data['hit']

        # Define reward. For simplicity, a negative reward if border is hit.
        reward = -10 if done else 1

        return new_position, reward, done, {}

    def render(self, mode='human'):
        # Optional method for visualization
        pass

    def close(self):
        pass

# Initialize the Q-learning agent and the environment
agent = QLearningAgent(n_actions=4, n_states=50)
env = RectangleMoveEnv()

# Train the agent on the environment
def train(agent, env, episodes=1000):
    for episode in range(episodes):
        state = discretize(env.reset())
        done = False
        while not done:
            action = agent.choose_action(state)
            next_state, reward, done, _ = env.step(action)
            next_state = discretize(next_state)
            agent.learn(state, action, reward, next_state)
            state = next_state

train(agent, env)

# Q-learning agent definition
class QLearningAgent:
    def __init__(self, n_actions, n_states, alpha=0.1, gamma=0.9, epsilon=0.1):
        self.n_actions = n_actions
        self.q_table = np.zeros((n_states, n_states, n_actions))
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon

    def choose_action(self, state):
        if np.random.uniform(0, 1) < self.epsilon:
            return np.random.choice(self.n_actions)
        else:
            return np.argmax(self.q_table[state[0], state[1]])

    def learn(self, state, action, reward, next_state):
        predict = self.q_table[state[0], state[1], action]
        target = reward + self.gamma * np.max(self.q_table[next_state[0], next_state[1]])
        self.q_table[state[0], state[1], action] += self.alpha * (target - predict)

# Discretize the environment
def discretize(state, bins=50):
    return tuple((np.array(state) * bins // 500).astype(int))

class RectangleMoveEnv(gym.Env):
    def __init__(self):
        super(RectangleMoveEnv, self).__init__()

        # Define action and observation space
        # Let's assume action space is discrete: 0: Up, 1: Down, 2: Left, 3: Right
        self.action_space = spaces.Discrete(4)
        
        # Define the observation space as the position of the rectangle. This is a simplification.
        # You can expand this to include more features if needed.
        self.observation_space = spaces.Box(low=0, high=500, shape=(2,), dtype=float)  # Assuming canvas size of 500x500 for illustration

    def reset(self):
        # For simplicity, reset will return the rectangle to the center of the canvas
        # This can be expanded to communicate with Flask server to get an actual reset position
        return [250, 250]

    def step(self, action):
        # Translate the action to the parameters of move_rectangle function
        # For simplicity, let's assume fixed speed, distance, and angle based on the action
        mappings = {
            0: {'speed': 50, 'angle': 0, 'distance': 10},   # Up
            1: {'speed': 50, 'angle': 180, 'distance': 10}, # Down
            2: {'speed': 50, 'angle': 270, 'distance': 10}, # Left
            3: {'speed': 50, 'angle': 90, 'distance': 10}   # Right
        }

        params = mappings[action]
        response = requests.post('http://127.0.0.1:5000/move_rectangle', json=params)
        data = response.json()

        # Get the new position (state) from the data
        new_position = [data['new_x'], data['new_y']]
        
        # Check border collision
        border_hit_response = requests.get('http://127.0.0.1:5000/border_hit')
        hit_data = border_hit_response.json()
        done = hit_data['hit']

        # Define reward. For simplicity, a negative reward if border is hit.
        reward = -10 if done else 1

        return new_position, reward, done, {}

    def render(self, mode='human'):
        # Optional method for visualization
        pass

    def close(self):
        pass

# Initialize the Q-learning agent and the environment
agent = QLearningAgent(n_actions=4, n_states=50)
env = RectangleMoveEnv()

# Train the agent on the environment
def train(agent, env, episodes=1000):
    for episode in range(episodes):
        state = discretize(env.reset())
        done = False
        while not done:
            action = agent.choose_action(state)
            next_state, reward, done, _ = env.step(action)
            next_state = discretize(next_state)
            agent.learn(state, action, reward, next_state)
            state = next_state

train(agent, env)

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [5]:
mappings = {
            0: {'speed': 50, 'angle': 0, 'distance': 10},   # Up
            1: {'speed': 50, 'angle': 180, 'distance': 10}, # Down
            2: {'speed': 50, 'angle': 270, 'distance': 10}, # Left
            3: {'speed': 50, 'angle': 90, 'distance': 10}   # Right
        }

params = mappings[0]

In [6]:
response = requests.post('http://127.0.0.1:5000/move_rectangle', json=params)
print(response.status_code)  # should be 200 for a successful response
print(response.text)  # print the actual content of the response

404
<!doctype html>
<html lang=en>
<title>404 Not Found</title>
<h1>Not Found</h1>
<p>The requested URL was not found on the server. If you entered the URL manually please check your spelling and try again.</p>



In [4]:
params

{'speed': 50, 'angle': 0, 'distance': 10}