In [1]:
import pandas as pd
import numpy as np
!pip install openpyxl




[notice] A new release of pip is available: 23.3.1 -> 23.3.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [36]:
# Read in the data
data = pd.read_excel('train.xlsx')
print(data.head())

      PRICES  Hour 01  Hour 02  Hour 03  Hour 04  Hour 05  Hour 06  Hour 07  \
0 2007-01-01    24.31    24.31    21.71     8.42     0.01     0.01     0.02   
1 2007-01-02    16.01    11.00     9.01     7.50     9.00     7.45    16.50   
2 2007-01-03    28.00    24.50    24.15    18.99    15.50    23.11    29.01   
3 2007-01-04    31.01    28.69    27.00    24.20    12.02    27.00    28.00   
4 2007-01-05    22.00    19.50    17.55    15.91    11.40    22.65    27.30   

   Hour 08  Hour 09  ...  Hour 15  Hour 16  Hour 17  Hour 18  Hour 19  \
0     0.01     0.01  ...    15.00    10.00     8.17    27.77    37.99   
1    28.01    29.96  ...    37.57    36.45    37.99    53.00    59.69   
2    39.73    43.81  ...    43.21    43.99    48.00    57.00    60.99   
3    34.85    41.31  ...    40.01    38.00    42.66    53.00    58.99   
4    34.00    40.01  ...    40.06    38.00    38.61    49.84    53.00   

   Hour 20  Hour 21  Hour 22  Hour 23  Hour 24  
0    33.11    37.99    33.00    36.48

In [42]:
from datetime import datetime

def get_max_price(data):
    # drop first colum ('PRICES')
    data = data.drop('PRICES', axis=1)
    
    # get max price for each row
    max_price = 0
    for i in range(len(data)):
        max_row_price = max(data.iloc[i])
        
        if max_row_price > max_price:
            max_price = max_row_price 
    
    return max_price

In [47]:
# create environments
# !pip install gym
import gym
from gym import spaces
import numpy as np

class CustomEnv(gym.Env):
    
    def __init__(self, data):
        # Define action space and observation space
        self.data = data
        self.action_space = spaces.Discrete(3)  # 3 actions: sell, buy, nothing
        self.capacity = np.random.randint(0, 50) # random capacity between 0 and 50
        self.piggy_bank = 0

        self.month = 1
        self.hour = 1
        self.price = data.iloc[0, 1]
        self.date = 0
        self.state = np.array([self.price, self.hour, self.month])
        self.bin_amount = 10
            
        # create observation space with price, hour and month
        self.lows = np.array([0, 1, 1])  
        self.highs = np.array([get_max_price(data), 24, 12])
        self.observation_space = spaces.Box(low=self.lows, high=self.highs, dtype=np.float32)

        # bins
        self.price_bins = np.linspace(0, get_max_price(data), self.bin_amount)
        self.bins = [self.price_bins, np.linspace(1, 24, 24), np.linspace(1, 12, 12)]

        # create q-table
        self.q_table = np.zeros((self.bin_amount - 1, 24, 12, 3)) # price, hour, month, action
        
        self.discount_rate = 0.95
        self.epsilon = 0.9
        
        self.go_out = False

    def discretize_state(self, state):
        self.state = state
        
        #Create an empty state
        digitized_state = []
    
        for i in range(len(self.bins)):
            digitized_state.append(np.digitize(self.state[i], self.bins[i])-1)
        
        #Returns the discretized state from an observation
        return digitized_state

    def reset(self): 

        # get random hour (1-24) 
        self.hour = np.random.randint(1, 25)
        
        # pick random row from data to get random price
        self.date = np.random.randint(0, len(self.data))
        
        # get month based on date
        self.month = self.data.iloc[self.date, 0].month
        
        # get price
        self.price = self.data.iloc[self.date, self.hour]
            
        # Reset the environment to its initial state and return the initial observation
        self.initial_observation = np.array([self.price, self.hour, self.month])
        
        self.state = self.initial_observation
            
    
    def choose_action(self, action):
        # Choose an action from the action space
        if np.random.uniform(0,1) > 1-self.epsilon: # 
            
            #This picks a random action from 0,1,2
            action = np.random.randint(0,3)    
                    
        #Pick a greedy action with chance 1-epsilon
        else:
            action = np.argmax(self.q_table[self.state[0], self.state[1], self.state[2]])
        
        # charge car if capacity is below 20 for 8 o'clock
        if self.hour == 7 and self.capacity < 20:
            action = 1
            
        # with chance of 50% car is taken out between 8 and 16
        elif self.hour == 8:
            if np.random.randint(0, 2) == 1: # 50% chance
                    self.go_out = True 
            
        # if car is out, action will be nothing
        elif self.hour >= 8 and self.hour < 17 and self.go_out == True:
            action = 2
        
        # car is back at 17 o'clock, reset go_out
        elif self.hour == 17:
            self.go_out = False
                 
        return action

    def step(self, action):
        # Take an action in the environment and return the new observation, reward, done flag, and additional information
        # Example logic (you'll need to define your own environment dynamics)
        
        action = self.choose_action(action)

        # calculate rewards and capacity
        if action == 0: # sell
            if self.capacity < 25:
                reward = - ((2 * (self.price / 1000))  * self.capacity)
                self.capacity = 0
            else:
                reward = (self.price / 1000) * 25
                self.capacity = self.capacity - 25
        
        elif action == 1: # buy
            if self.capacity > 25:
                reward = - ((2 * (self.price / 1000))  * (50 - self.capacity))
                self.capacity = 50              
            else:
                reward =  - ((2 * (self.price / 1000))  * 25)
                self.capacity = self.capacity + 25
        
        elif action == 2: # nothing
            reward = 0

        if self.hour == 24:
            self.hour = 1
            self.date = self.date + 1
            
        else:
            self.hour = self.hour + 1 

        
        
        if self.date > len(self.data):
            # end of data
            done = True
        else:
            self.month = self.data.iloc[self.date, 0].month
            self.price = self.data.iloc[self.date, self.hour]
                
        # update state
        self.state = np.array([self.price, self.hour, self.month])
        
        return reward, done

    def render(self, mode='human'):
        # Visualize the environment (optional)
        pass

    def close(self):
        # Close the environment or perform cleanup (optional)
        pass


In [48]:
test = CustomEnv(data)
test.reset()
print(test.state)
test.discretize_state(test.state)

[99.96 11.    2.  ]


  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


[0, 10, 1]