# Boring Pre-req loading data and packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch


In [2]:
df = pd.read_csv('/Users/spencerfonbuena/Documents/Python/Trading Models/DNNM/mach1/datasets/ES_1min_continuous_adjusted.txt', sep=',', index_col=0, header=None, names=["Date", 'Open', 'High', 'low', 'Close', 'Volume'])

In [3]:
df['Close']

Date
2005-01-03 08:00:00    121.56
2005-01-03 08:30:00    121.55
2005-01-03 09:00:00    121.56
2005-01-03 09:30:00    121.52
2005-01-03 10:00:00    120.98
                        ...  
2023-06-09 17:30:00    429.97
2023-06-09 18:00:00    429.95
2023-06-09 18:30:00    429.98
2023-06-09 19:00:00    430.02
2023-06-09 19:30:00    430.04
Name: Close, Length: 143792, dtype: float64

In [4]:
df

Unnamed: 0_level_0,Open,High,low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2005-01-03 08:00:00,121.60,121.60,121.52,121.56,143200
2005-01-03 08:30:00,121.57,121.60,121.53,121.55,284200
2005-01-03 09:00:00,121.54,121.62,121.45,121.56,247900
2005-01-03 09:30:00,121.56,121.76,121.47,121.52,5974100
2005-01-03 10:00:00,121.53,121.64,120.88,120.98,8743800
...,...,...,...,...,...
2023-06-09 17:30:00,429.94,430.07,429.91,429.97,18912
2023-06-09 18:00:00,429.97,429.98,429.92,429.95,4337
2023-06-09 18:30:00,429.94,430.01,429.94,429.98,14252
2023-06-09 19:00:00,429.96,430.02,429.96,430.02,604


### Moving averages

In [3]:
#50 period moving average
df['50SMA'] = df['Close'].rolling(50).mean()

#200 period moving average
df['200SMA'] = df['Close'].rolling(200).mean()

### RSI

In [4]:
def RSI(df, lookback):
    deltas = np.diff(df)
    seed = deltas[:lookback+1]
    up = seed[seed>= 0].sum()/lookback
    down = -seed[seed < 0].sum()/lookback
    rs = up/down
    rsi = np.zeros_like(df)
    
    for i in range(lookback, len(df)):
        delta = deltas[i-1]

        if delta > 0:
            upval = delta
            downval = 0.
        if delta < 0:
            upval = 0
            downval=abs(delta)
        up = (up * (lookback - 1) + upval) / lookback
        down = (down * (lookback - 1) + downval) / lookback

        rs = up/down
        rsi[i] = 100. - 100./(1. +rs)

    return rsi
df['RSI'] = RSI(df['Close'], 14)
    


In [7]:
df

Unnamed: 0_level_0,Open,High,low,Close,Volume,50SMA,200SMA,RSI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2005-01-03 08:00:00,121.60,121.60,121.52,121.56,143200,,,0.000000
2005-01-03 08:30:00,121.57,121.60,121.53,121.55,284200,,,0.000000
2005-01-03 09:00:00,121.54,121.62,121.45,121.56,247900,,,0.000000
2005-01-03 09:30:00,121.56,121.76,121.47,121.52,5974100,,,0.000000
2005-01-03 10:00:00,121.53,121.64,120.88,120.98,8743800,,,0.000000
...,...,...,...,...,...,...,...,...
2023-06-09 17:30:00,429.94,430.07,429.91,429.97,18912,429.202558,427.412077,52.497239
2023-06-09 18:00:00,429.97,429.98,429.92,429.95,4337,429.269558,427.451627,52.241337
2023-06-09 18:30:00,429.94,430.01,429.94,429.98,14252,429.339158,427.489377,52.614466
2023-06-09 19:00:00,429.96,430.02,429.96,430.02,604,429.391758,427.529977,53.140157


## Method for creating the labels

In [15]:
def create_labels(df):

    A = 0
    C = 0
    labels = np.array([])
    
    print(df)
    for i in range(0, (len(df))):


        #find 1 percent and 2 percent above and below
        #print(df[A])
        one_low = df[A] * .99
        two_low = df[A] * .98
        one_high = df[A] * 1.01
        two_high = df[A] * 1.02

        #print(f'1 low: {one_low} | 2 low: {two_low} | 1 high: {one_high} | 2 high: {two_high}')
        #initialize the label counter
        label_counter = A

        #this is to make sure that once it either enters the "gone up by one percent" or "gone down by 1 percent"
        #it doesn't enter the other while loops
        pathway = 0

        try:
            #look for the instance when the price increases or decreases by 1 percent
            while df[label_counter] >= one_low and df[label_counter] <= one_high:
                label_counter += 1
                #print(df[label_counter])
            #If the price moved up 1 pecent first, this while loop will trigger and check if it is a two to one, or a one to one trade
            while df[label_counter] >= one_low and df[label_counter] <= two_high:
                label_counter += 1
                pathway = 1
                #print(df[label_counter])
            #Check if price has increased two percent
            if df[label_counter] >= two_high:
                labels = np.append(labels, 2)
                pathway = 1
                #print(df[label_counter])
            #check if price has reversed back down to the one percent marker
            if df[label_counter] <= one_low and pathway == 1:
                labels = np.append(labels, 1)
                #print(df[label_counter])
            
            #if the price moved down 1 pecent first, this will check if it is a two to one, or a one to one trade
            while df[label_counter] <= one_high and df[label_counter] >= two_low and pathway != 1:
                label_counter += 1
                pathway = 2
                #print(df[label_counter])
        
            #check if the price has continued down two percent
            if df[label_counter] <= two_low and pathway != 1:
                labels = np.append(labels, 0)
                #print(df[label_counter])
            #check if price reversed back up to the 1 percent above marker
            if df[label_counter] >= one_high and pathway != 1:
                labels = np.append(labels, 1)
                #print(df[label_counter])
            
            #temporarily store the last label that was added to "labels=[]"
            C = labels[-1]

        except:
            break
        #increment the graph by one time interval
        A += 1 

    #Create an array with the last value before the classification algorithm stopped
    array_append = []
    while A < len(df):
        array_append = np.append(array_append, C)
        A += 1
        

    labels = np.append(labels, array_append)
    return labels
df['Labels'] = create_labels(df['Close'])

Date
2008-01-02 06:00:00    1317.50
2008-01-02 06:01:00    1317.75
2008-01-02 06:02:00    1318.25
2008-01-02 06:03:00    1318.25
2008-01-02 06:04:00    1318.00
                        ...   
2023-06-09 16:55:00    4350.00
2023-06-09 16:56:00    4350.25
2023-06-09 16:57:00    4350.25
2023-06-09 16:58:00    4349.75
2023-06-09 16:59:00    4348.75
Name: Close, Length: 5381922, dtype: float64


## Create a secondary dataframe store to create normalized dataset

In [5]:
df

Unnamed: 0_level_0,Open,High,low,Close,Volume,50SMA,200SMA,RSI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2008-01-02 06:00:00,1316.25,1317.75,1316.00,1317.50,2317,,,0.000000
2008-01-02 06:01:00,1317.25,1317.75,1317.00,1317.75,777,,,0.000000
2008-01-02 06:02:00,1318.00,1318.25,1317.75,1318.25,593,,,0.000000
2008-01-02 06:03:00,1318.25,1318.50,1318.00,1318.25,472,,,0.000000
2008-01-02 06:04:00,1318.25,1318.25,1318.00,1318.00,57,,,0.000000
...,...,...,...,...,...,...,...,...
2023-06-09 16:55:00,4349.50,4350.25,4349.50,4350.00,189,4351.255,4351.60000,49.222864
2023-06-09 16:56:00,4350.00,4350.50,4350.00,4350.25,357,4351.235,4351.59000,51.656802
2023-06-09 16:57:00,4350.00,4350.75,4350.00,4350.25,294,4351.215,4351.58125,54.029826
2023-06-09 16:58:00,4350.00,4350.25,4349.25,4349.75,613,4351.195,4351.57125,48.863670


In [6]:
dataframe = pd.DataFrame()

In [7]:
dataframe['Open'] = df['Open'].pct_change()
dataframe['High'] = df['High'].pct_change()
dataframe['Low'] = df['low'].pct_change()
dataframe['Close'] = df['Close'].pct_change()
dataframe['Volume'] = df['Volume'].pct
dataframe['50SMA'] = df['50SMA'].pct_change()
dataframe['200SMA'] = df['200SMA'].pct_change()
dataframe['RSI'] = df['RSI'].pct_change()
#dataframe['Labels'] = df['Labels']

In [9]:
dataframe['Volume'] = df['Volume'].pct_change()

In [10]:
dataframe

Unnamed: 0_level_0,Open,High,Low,Close,Volume,50SMA,200SMA,RSI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2008-01-02 06:00:00,,,,,,,,
2008-01-02 06:01:00,0.000760,0.000000,0.000760,0.000190,-0.664653,,,
2008-01-02 06:02:00,0.000569,0.000379,0.000569,0.000379,-0.236808,,,
2008-01-02 06:03:00,0.000190,0.000190,0.000190,0.000000,-0.204047,,,
2008-01-02 06:04:00,0.000000,-0.000190,0.000000,-0.000190,-0.879237,,,
...,...,...,...,...,...,...,...,...
2023-06-09 16:55:00,-0.000057,0.000115,0.000057,0.000057,0.359712,-0.000003,-0.000002,0.053288
2023-06-09 16:56:00,0.000115,0.000057,0.000115,0.000057,0.888889,-0.000005,-0.000002,0.049447
2023-06-09 16:57:00,0.000000,0.000057,0.000000,0.000000,-0.176471,-0.000005,-0.000002,0.045938
2023-06-09 16:58:00,0.000000,-0.000115,-0.000172,-0.000115,1.085034,-0.000005,-0.000002,-0.095617


In [8]:
dataframe.to_csv('/Users/spencerfonbuena/Documents/Python/Trading Models/DNNM/mach1/datasets/ES_1min_returns.txt')
#df.to_csv('/Users/spencerfonbuena/Documents/Python/Trading Models/DNNM/mach1/datasets/SPY_30mins_raw.txt')

## Method for windowing the data

In [None]:
datafile = '/Users/spencerfonbuena/Documents/Python/Trading Models/models/mach1/datasets/AAPL_1hour_corrected.txt'
df = pd.read_csv(datafile, delimiter=',', index_col=0)

In [None]:
def window_dataset(df, window_size):
    A = 0
    window_set = []
    for i in range(len(df) - 100):
        example = df[i: window_size + i]
        window_set.append(np.expand_dims(example, 0))
        if i % 1000 == 0:
            print(i)
    return torch.tensor(np.vstack(window_set)).transpose(-1,-2)
df_expand = window_dataset(df, 100)