In [None]:
import gym
from gym import spaces
import numpy as np
import random
import math

class WarehouseEnv(gym.Env):
    def __init__(self):
        super(WarehouseEnv, self).__init__()

        # Define grid size and positions
        self.grid_size = 10
        self.robot_pos = [0, 0]  # Starting position of the robot
        self.items = [[5, 5], [2, 8], [8, 3]]  # Positions of items

        # Action space: 0 = Up, 1 = Down, 2 = Left, 3 = Right
        self.action_space = spaces.Discrete(4)

        # Observation space is the robot's position in the grid
        self.observation_space = spaces.Box(low=0, high=self.grid_size - 1, shape=(2,), dtype=np.int32)

    def reset(self):
        self.robot_pos = [0, 0]
        self.items = [[5, 5], [2, 8], [8, 3]]  # Reset item positions
        return np.array(self.robot_pos)

    def step(self, action):
        # Move robot based on action
        if action == 0:   # Up
            self.robot_pos[1] = max(0, self.robot_pos[1] - 1)
        elif action == 1: # Down
            self.robot_pos[1] = min(self.grid_size - 1, self.robot_pos[1] + 1)
        elif action == 2: # Left
            self.robot_pos[0] = max(0, self.robot_pos[0] - 1)
        elif action == 3: # Right
            self.robot_pos[0] = min(self.grid_size - 1, self.robot_pos[0] + 1)

        reward = -0.1  # Slight penalty for movement
        done = False

        # Check if the robot is on an item
        if self.robot_pos in self.items:
            reward = 1  # Reward for reaching an item
            self.items.remove(self.robot_pos)

        if not self.items:  # If all items are picked up
            done = True
            reward = 10  # Large reward for completing task

        return np.array(self.robot_pos), reward, done, {}

    def render(self):
        grid = np.zeros((self.grid_size, self.grid_size), dtype=int)
        for item in self.items:
            grid[item[1], item[0]] = 2  # Mark items with '2'
        grid[self.robot_pos[1], self.robot_pos[0]] = 1  # Mark robot with '1'
        print(grid)

# Greedy policy function to choose action towards the closest item
def greedy_policy(robot_pos, items):
    if not items:
        return random.choice([0, 1, 2, 3])

    distances = [math.sqrt((item[0] - robot_pos[0]) ** 2 + (item[1] - robot_pos[1]) ** 2) for item in items]
    closest_item = items[np.argmin(distances)]

    # Choose action that minimizes distance to the closest item
    if closest_item[0] > robot_pos[0]:
        return 3  # Move Right
    elif closest_item[0] < robot_pos[0]:
        return 2  # Move Left
    elif closest_item[1] > robot_pos[1]:
        return 1  # Move Down
    else:
        return 0  # Move Up

# Run the environment with the Greedy policy
env = WarehouseEnv()
state = env.reset()
done = False

print("Starting Warehouse Navigation Simulation\n")
while not done:
    action = greedy_policy(env.robot_pos, env.items)
    state, reward, done, info = env.step(action)
    env.render()
    print(f"Reward: {reward}\n")


Starting Warehouse Navigation Simulation

[[0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 2 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 2 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 2 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]]
Reward: -0.1

[[0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 2 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 2 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 2 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]]
Reward: -0.1

[[0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 2 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 2 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 2 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]]
Reward: -0.1

[[0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 2 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 2 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 2 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 

Initial Setup:

The environment is initialized as a 10x10 grid.
The robot starts at position
[
0
,
0
]
[0,0], and the items are placed at predefined positions:
[
[
5
,
5
]
,
[
2
,
8
]
,
[
8
,
3
]
]
[[5,5],[2,8],[8,3]].
The goal is to collect all items by moving the robot to their positions.
Greedy Policy:

At each step, the greedy policy chooses the action that moves the robot closer to the nearest item.
The distance to each item is calculated using the Euclidean distance formula:
𝑑
=
(
𝑥
2
−
𝑥
1
)
2
+
(
𝑦
2
−
𝑦
1
)
2
d=
(x
2
​
 −x
1
​
 )
2
 +(y
2
​
 −y
1
​
 )
2

​

The item with the minimum distance is identified as the closest item.
Action Selection:

The greedy policy selects the action that minimizes the distance to the closest item:
Move Right (3): If the item's x-coordinate is greater than the robot's x-coordinate.
Move Left (2): If the item's x-coordinate is smaller than the robot's x-coordinate.
Move Down (1): If the item's y-coordinate is greater than the robot's y-coordinate.
Move Up (0): If the item's y-coordinate is smaller than the robot's y-coordinate.
Reward Structure:

The robot receives:
-0.1 for each movement to encourage efficiency (penalty for unnecessary moves).
+1 when it reaches an item.
+10 when all items are collected, marking task completion.
Termination Condition:

The simulation ends when all items are collected (
‘
𝑑
𝑜
𝑛
𝑒
=
𝑇
𝑟
𝑢
𝑒
‘
‘done=True‘).

In [None]:
import gym
import numpy as np
import random
import pandas as pd
from gym import spaces

class StockTradingEnv(gym.Env):
    def __init__(self, data):
        super(StockTradingEnv, self).__init__()

        self.data = data
        self.current_step = 0
        self.balance = 1000  # Initial balance
        self.position = 0  # Stock position (number of stocks held)
        self.net_worth = self.balance
        self.max_steps = len(data) - 1

        # Action space: 0 = Hold, 1 = Buy, 2 = Sell
        self.action_space = spaces.Discrete(3)

        # Observation space includes stock price and net worth
        self.observation_space = spaces.Box(low=0, high=np.inf, shape=(2,), dtype=np.float32)

    def reset(self):
        self.current_step = 0
        self.balance = 1000
        self.position = 0
        self.net_worth = self.balance
        return self._next_observation()

    def _next_observation(self):
        current_price = self.data[self.current_step]
        return np.array([current_price, self.net_worth])

    def step(self, action):
        current_price = self.data[self.current_step]

        if action == 1:  # Buy
            self.position += self.balance // current_price
            self.balance %= current_price
        elif action == 2 and self.position > 0:  # Sell
            self.balance += self.position * current_price
            self.position = 0

        self.net_worth = self.balance + self.position * current_price
        self.current_step += 1
        done = self.current_step >= self.max_steps
        reward = self.net_worth - 1000  # Reward is change in net worth

        return self._next_observation(), reward, done, {}

    def render(self):
        print(f"Step: {self.current_step}, Balance: {self.balance}, Position: {self.position}, Net Worth: {self.net_worth}")

# Sample data for stock prices
data = np.random.normal(100, 10, 100)

env = StockTradingEnv(data)
state = env.reset()
done = False

while not done:
    action = random.choice([0, 1, 2])  # Greedy action can be applied here with a real strategy
    state, reward, done, info = env.step(action)
    env.render()


Step: 1, Balance: 1000, Position: 0, Net Worth: 1000.0
Step: 2, Balance: 1000, Position: 0, Net Worth: 1000.0
Step: 3, Balance: 1000, Position: 0, Net Worth: 1000.0
Step: 4, Balance: 1000, Position: 0, Net Worth: 1000.0
Step: 5, Balance: 1000, Position: 0, Net Worth: 1000.0
Step: 6, Balance: 96.75362658816596, Position: 8.0, Net Worth: 1000.0
Step: 7, Balance: 96.75362658816596, Position: 8.0, Net Worth: 1001.9787899237039
Step: 8, Balance: 96.75362658816596, Position: 8.0, Net Worth: 933.4670706055146
Step: 9, Balance: 96.75362658816596, Position: 8.0, Net Worth: 869.5517889461931
Step: 10, Balance: 4.6264435889142135, Position: 9.0, Net Worth: 833.7710905821799
Step: 11, Balance: 4.6264435889142135, Position: 9.0, Net Worth: 872.1427698208875
Step: 12, Balance: 4.6264435889142135, Position: 9.0, Net Worth: 1131.9791757565313
Step: 13, Balance: 4.6264435889142135, Position: 9.0, Net Worth: 1059.2857373715924
Step: 14, Balance: 4.6264435889142135, Position: 9.0, Net Worth: 850.67106658

In [None]:
!pip install PySimpleGUIWeb


Collecting PySimpleGUIWeb
  Downloading PySimpleGUIWeb-0.39.0-py3-none-any.whl.metadata (17 kB)
Collecting remi<=2020.3.10 (from PySimpleGUIWeb)
  Downloading remi-2020.3.10-py3-none-any.whl.metadata (17 kB)
Downloading PySimpleGUIWeb-0.39.0-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading remi-2020.3.10-py3-none-any.whl (505 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m505.1/505.1 kB[0m [31m18.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: remi, PySimpleGUIWeb
Successfully installed PySimpleGUIWeb-0.39.0 remi-2020.3.10


In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output
import numpy as np
import random

# Warehouse Environment Simulation Logic
class WarehouseEnv:
    def __init__(self, grid_size=10, items=None):
        self.grid_size = grid_size
        self.robot_pos = [0, 0]  # Starting position of the robot
        self.items = items or [[5, 5], [2, 8], [8, 3]]  # Positions of items

    def reset(self):
        self.robot_pos = [0, 0]
        self.items = [[5, 5], [2, 8], [8, 3]]  # Reset items
        return np.array(self.robot_pos)

    def step(self, action):
        # Actions: 0 = Up, 1 = Down, 2 = Left, 3 = Right
        if action == 0 and self.robot_pos[1] > 0:
            self.robot_pos[1] -= 1
        elif action == 1 and self.robot_pos[1] < self.grid_size - 1:
            self.robot_pos[1] += 1
        elif action == 2 and self.robot_pos[0] > 0:
            self.robot_pos[0] -= 1
        elif action == 3 and self.robot_pos[0] < self.grid_size - 1:
            self.robot_pos[0] += 1

        reward = -0.1
        done = False
        if self.robot_pos in self.items:
            reward = 1
            self.items.remove(self.robot_pos)

        if not self.items:
            done = True
            reward = 10
        return np.array(self.robot_pos), reward, done

    def render(self):
        grid = np.zeros((self.grid_size, self.grid_size), dtype=int)
        for item in self.items:
            grid[item[1], item[0]] = 2  # Item position marked with '2'
        grid[self.robot_pos[1], self.robot_pos[0]] = 1  # Robot position marked with '1'
        print(grid)

def greedy_policy(robot_pos, items):
    if not items:
        return random.choice([0, 1, 2, 3])

    distances = [np.linalg.norm(np.array(item) - np.array(robot_pos)) for item in items]
    closest_item = items[np.argmin(distances)]

    if closest_item[0] > robot_pos[0]:
        return 3
    elif closest_item[0] < robot_pos[0]:
        return 2
    elif closest_item[1] > robot_pos[1]:
        return 1
    else:
        return 0

# Stock Trading Environment Simulation Logic
class StockTradingEnv:
    def __init__(self, data, initial_balance=1000):
        self.data = data
        self.current_step = 0
        self.balance = initial_balance
        self.position = 0
        self.net_worth = self.balance
        self.max_steps = len(data) - 1

    def reset(self):
        self.current_step = 0
        self.balance = 1000
        self.position = 0
        self.net_worth = self.balance
        return np.array([self.data[self.current_step], self.net_worth])

    def step(self, action):
        current_price = self.data[self.current_step]

        if action == 1:  # Buy
            self.position += self.balance // current_price
            self.balance %= current_price
        elif action == 2 and self.position > 0:  # Sell
            self.balance += self.position * current_price
            self.position = 0

        self.net_worth = self.balance + self.position * current_price
        self.current_step += 1
        done = self.current_step >= self.max_steps
        reward = self.net_worth - 1000
        return np.array([self.data[self.current_step], self.net_worth]), reward, done

    def render(self):
        print(f"Step: {self.current_step}, Balance: {self.balance}, Position: {self.position}, Net Worth: {self.net_worth}")

# Simulation functions
def run_warehouse_simulation():
    env = WarehouseEnv()
    state = env.reset()
    done = False
    print("Warehouse Simulation Starting...\n")
    while not done:
        action = greedy_policy(env.robot_pos, env.items)
        state, reward, done = env.step(action)
        env.render()
        print(f"Reward: {reward}\n")

def run_stock_trading_simulation():
    data = np.random.normal(100, 10, 100)  # Generate random stock prices
    env = StockTradingEnv(data)
    state = env.reset()
    done = False
    print("Stock Trading Simulation Starting...\n")
    while not done:
        action = random.choice([0, 1, 2])  # 0=Hold, 1=Buy, 2=Sell
        state, reward, done = env.step(action)
        env.render()
        print(f"Reward: {reward}\n")

# IPython Widgets Interface
def start_warehouse():
    clear_output(wait=True)
    print("Starting Warehouse Simulation...")
    run_warehouse_simulation()

def start_stock_trading():
    clear_output(wait=True)
    print("Starting Stock Trading Simulation...")
    run_stock_trading_simulation()

warehouse_button = widgets.Button(description="Start Warehouse Navigation")
warehouse_button.on_click(lambda x: start_warehouse())

trading_button = widgets.Button(description="Start Stock Trading")
trading_button.on_click(lambda x: start_stock_trading())

display(warehouse_button, trading_button)


Starting Warehouse Simulation...
Warehouse Simulation Starting...

[[0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 2 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 2 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 2 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]]
Reward: -0.1

[[0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 2 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 2 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 2 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]]
Reward: -0.1

[[0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 2 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 2 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 2 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]]
Reward: -0.1

[[0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 2 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 2 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 2 0 0 0 