## Import Package

In [34]:
import pandas as pd
import numpy as np
import random

## Environment

In [2]:
class Environment():
    
    def __init__(self, stock1="F", stock2="GM"):
        # get two stocks' close price
        self.stock1_price = self.get_close_price(filename=stock1)
        self.stock2_price = self.get_close_price(filename=stock2)
        self.spread_ratio = None
        
        # let stock1_price > stock2_price
        self.reset_stock_order()
        
        # genearte label: label for spread ratio
        self.label = self.generate_label()
        
        # current time interval
        self.cur_ti = 19
        
        
    def get_close_price(self, filename):
        stock_df = pd.read_csv("../../Data/{}.csv".format(filename))
        return stock_df["Close"].values[:1000]
    
    
    def reset_stock_order(self):
        if self.stock1_price.mean() < self.stock2_price.mean():
            self.stock1_price, self.stock2_price = self.stock2_price, self.stock1_price
        
        
    def generate_label(self):
        self.spread_ratio = self.stock1_price/self.stock2_price
        
        ma_20 = []
        pressure = []
        support = []

        for idx, value in enumerate(self.spread_ratio):
            if idx < 19:
                continue
            ma_20.append(self.spread_ratio[idx-19:idx+1].mean())
            std = self.spread_ratio[idx-19:idx+1].std()
            pressure.append(ma_20[-1] + 2*std)
            support.append(ma_20[-1] - 2*std)
            
        self.temp = ma_20
        
        label = []
        for idx, value in enumerate(self.spread_ratio[19:]):
            if value >= pressure[idx] or value <= support[idx]:
                label.append(1)
            elif abs(value-ma_20[idx])<=0.01:
                label.append(-1)
            else:
                label.append(0)
                
        return label
    
    def reset(self):
        self.cur_ti = 19
        return self.spread_ratio[self.cur_ti-19:self.cur_ti+1]
    
    def step(self, action):
        # calculate reward
        if self.label[self.cur_ti-19] == 0:
            if action == 0:
                reward = 1
            else:
                reward = -1
        elif self.label[self.cur_ti-19] == 1:
            if action == self.label[self.cur_ti-19]:
                reward = 5.5
            else:
                reward = -6.5
                
        else:
            if action == self.label[self.cur_ti-19]:
                reward = 7
            else:
                reward = -3
            
        self.cur_ti = random.sample(range(19, 980), 1)[0]
        
        if self.cur_ti+1 == len(self.stock1_price):
            is_done = True
        else:
            is_done = False
        
        new_state = self.spread_ratio[self.cur_ti-19:self.cur_ti+1]/100
        
        return new_state, reward, is_done, self.label[self.cur_ti]