# 01 IMPORTS

In [2]:
import pandas as pd
import glob
import numpy as np
import matplotlib.pyplot as plt
import time
from datetime import datetime

import warnings # necessary b/c pandas & statsmodels datetime issue
warnings.simplefilter(action="ignore")

# 02 DATA IMPORT

In [6]:
df = pd.read_csv('C:\\Users\\benro\\OneDrive\\Documents\\EODData\\StockProject_New\\Combined1MinData_042022_062022.csv', index_col=[0])


In [7]:
df.head()

Unnamed: 0,Symbol,Date,Open,High,Low,Close,Volume,FullDate,Time
0,AACG,04-01-2022,1.54,1.54,1.54,1.54,100,01-Apr-2022 09:14,09:14
1,AACG,04-01-2022,1.48,1.48,1.48,1.48,500,01-Apr-2022 09:26,09:26
2,AACG,04-01-2022,1.55,1.55,1.55,1.55,100,01-Apr-2022 09:27,09:27
3,AACG,04-01-2022,1.56,1.56,1.56,1.56,298,01-Apr-2022 09:30,09:30
4,AACG,04-01-2022,1.49,1.49,1.49,1.49,100,01-Apr-2022 09:32,09:32


In [35]:
# limit to Apple stock only for now

dfstk=df[df['Symbol'] == "AAPL"]
dfstk.head()
dfstk.to_csv('C:\\Users\\benro\\OneDrive\\Documents\\EODData\\StockProject_New\\AAPL_042022_062022.csv')

In [36]:
# Stock market is open 9:30-40
dfstk['Time'].agg(['min', 'max'])
# These times 9am to 5pm represent some before and after hours trading

min    09:00
max    16:59
Name: Time, dtype: object

## Interval Comparisons

In [37]:
# Let's first try to understand what's happening in the first 10, 20, and 30 minute of open market, our 'X' period
# For example, during the 'x' period:
# Was ClOSE higher or lower than the OPEN
# By how much
# Was that consistent for each 2 minute or 5 minute bar
# Was the OPEN higher or lower than the previous day CLOSE
# Was the CLOSE higher or lower than the previous day CLOSE
# Did VOLUME increase or decrease
# Was that consistent for each 2 minute or 5 minute bar
# Was VOLUME higher or lower than the x period yesterday
# What the price doing compared to the RSI  
# (https://www.wallstreetmojo.com/relative-strength-index/   
# RSI is calculated using the formula RSI = 100 – (100 / [1 + {14-Day Average Gain / 14-Day Average Loss}]))  


In [38]:
# Tick up, or green candle
dfstk['Green'] = dfstk['Close'].gt(dfstk['Open'])
dfstk.head(20)

Unnamed: 0,Symbol,Date,Open,High,Low,Close,Volume,FullDate,Time,Green
1144,AAPL,04-01-2022,173.65,173.67,173.55,173.6,18093,01-Apr-2022 09:00,09:00,False
1145,AAPL,04-01-2022,173.66,173.7,173.61,173.62,9482,01-Apr-2022 09:01,09:01,False
1146,AAPL,04-01-2022,173.66,173.66,173.4,173.4,20539,01-Apr-2022 09:02,09:02,False
1147,AAPL,04-01-2022,173.42,173.47,173.4,173.47,14775,01-Apr-2022 09:03,09:03,True
1148,AAPL,04-01-2022,173.44,173.44,173.3,173.3,18326,01-Apr-2022 09:04,09:04,False
1149,AAPL,04-01-2022,173.27,173.46,173.26,173.26,28783,01-Apr-2022 09:05,09:05,False
1150,AAPL,04-01-2022,173.44,173.61,173.4,173.5,36723,01-Apr-2022 09:06,09:06,True
1151,AAPL,04-01-2022,173.5,173.63,173.5,173.57,7217,01-Apr-2022 09:07,09:07,True
1152,AAPL,04-01-2022,173.6,173.71,173.26,173.5,35464,01-Apr-2022 09:08,09:08,False
1153,AAPL,04-01-2022,173.5,173.5,173.4,173.5,9329,01-Apr-2022 09:09,09:09,False


In [39]:
# Volume increase
# eq, ne, lt, gt :: ==, !=, <, >
dfstk['Int_vol_inc'] = dfstk.Volume.gt(dfstk.Volume.shift())
dfstk.head()

Unnamed: 0,Symbol,Date,Open,High,Low,Close,Volume,FullDate,Time,Green,Int_vol_inc
1144,AAPL,04-01-2022,173.65,173.67,173.55,173.6,18093,01-Apr-2022 09:00,09:00,False,False
1145,AAPL,04-01-2022,173.66,173.7,173.61,173.62,9482,01-Apr-2022 09:01,09:01,False,False
1146,AAPL,04-01-2022,173.66,173.66,173.4,173.4,20539,01-Apr-2022 09:02,09:02,False,True
1147,AAPL,04-01-2022,173.42,173.47,173.4,173.47,14775,01-Apr-2022 09:03,09:03,True,False
1148,AAPL,04-01-2022,173.44,173.44,173.3,173.3,18326,01-Apr-2022 09:04,09:04,False,True


In [40]:
# with help from https://blog.quantinsti.com/build-technical-indicators-in-python/
# Simple Moving Average
def SMA(data, ndays): 
    SMA = pd.Series(data['Close'].rolling(ndays).mean(), name = 'SMA') 
    data = data.join(SMA) 
    return data

# Exponentially-weighted Moving Average 
def EWMA(data, ndays): 
    EMA = pd.Series(data['Close'].ewm(span = ndays, min_periods = ndays - 1).mean(), 
                 name = 'EWMA_' + str(ndays)) 
    data = data.join(EMA) 
    return data

In [51]:

SMA = SMA(dfstk,5)


# df['add'] = df.apply(lambda row : add(row['A'], row['B'], row['C']), axis = 1)

In [52]:
dfstk.head(20)

Unnamed: 0,Symbol,Date,Open,High,Low,Close,Volume,FullDate,Time,Green,Int_vol_inc
1144,AAPL,04-01-2022,173.65,173.67,173.55,173.6,18093,01-Apr-2022 09:00,09:00,False,False
1145,AAPL,04-01-2022,173.66,173.7,173.61,173.62,9482,01-Apr-2022 09:01,09:01,False,False
1146,AAPL,04-01-2022,173.66,173.66,173.4,173.4,20539,01-Apr-2022 09:02,09:02,False,True
1147,AAPL,04-01-2022,173.42,173.47,173.4,173.47,14775,01-Apr-2022 09:03,09:03,True,False
1148,AAPL,04-01-2022,173.44,173.44,173.3,173.3,18326,01-Apr-2022 09:04,09:04,False,True
1149,AAPL,04-01-2022,173.27,173.46,173.26,173.26,28783,01-Apr-2022 09:05,09:05,False,True
1150,AAPL,04-01-2022,173.44,173.61,173.4,173.5,36723,01-Apr-2022 09:06,09:06,True,True
1151,AAPL,04-01-2022,173.5,173.63,173.5,173.57,7217,01-Apr-2022 09:07,09:07,True,False
1152,AAPL,04-01-2022,173.6,173.71,173.26,173.5,35464,01-Apr-2022 09:08,09:08,False,True
1153,AAPL,04-01-2022,173.5,173.5,173.4,173.5,9329,01-Apr-2022 09:09,09:09,False,False
