## Import Package

In [None]:
import pandas as pd
import numpy as np
import random

## Environment

In [None]:
class Environment():
    
    def __init__(self, stock1="GM", stock2="F"):
        # get two stocks' close price
        self.stock1_price = self.get_close_price(filename=stock1)
        self.stock2_price = self.get_close_price(filename=stock2)
        self.spread_ratio = None

        
        # genearte label: label for spread ratio
        self.dataset = self.gen_dataset()
        
        # current time interval
        self.cur = -1
        
        
    def get_close_price(self, filename):
        stock_df = pd.read_csv("../../../Data/{}.csv".format(filename))
        return stock_df["Close"].values[-2400:-1]

        
        
    def gen_dataset(self):
        self.spread_ratio = self.stock1_price/self.stock2_price
        
        ma_20 = []
        pressure = []
        support = []

        for idx, value in enumerate(self.spread_ratio):
            if idx < 19:
                continue
            ma_20.append(self.spread_ratio[idx-19:idx+1].mean())
            std = self.spread_ratio[idx-19:idx+1].std()
            pressure.append(ma_20[-1] + 2*std)
            support.append(ma_20[-1] - 2*std)
            
        
        label = []
        for idx, value in enumerate(self.spread_ratio[19:]):
            if value >= pressure[idx] or value <= support[idx]:
                label.append(1)
            elif abs(value-ma_20[idx])<=0.01:
                label.append(-1)
            else:
                label.append(0)
                
        dataset = []
        for i in range(19, len(self.spread_ratio)-1):
            input_data = list(self.spread_ratio[i-19:i+1])
            ans = label[(i-19)+1]
            input_data.append(ans)
            dataset.append(input_data)
            
        random.shuffle(dataset)
            
                
        return dataset
    
    

    def reset(self):
        self.cur += 1
        return self.dataset[self.cur][:-1]
    

    def step(self, action):
        # calculate reward
        if self.dataset[self.cur][-1] == 0:
            if action == 0:
                reward = 1
            else:
                reward = -1
        elif self.dataset[self.cur][-1] == 1:
            if action == 1:
                reward = 5.5
            else:
                reward = -5.5
                
        else:
            if action == self.dataset[self.cur][-1]:
                reward = 5.5
            else:
                reward = -5.5
            
        self.cur += 1
        
        if self.cur+1 == len(self.dataset):
            is_done = True
        else:
            is_done = False
        
        new_state = self.dataset[self.cur][:-1]
        
        return new_state, reward, is_done
