Test each tick by itself, depending on time of day. WIP

In [1]:
#Import functions
import polars as pl
from functools import partial
from itertools import product, combinations
import multiprocessing
from numba import jit
import numpy as np
import pandas as pd
import datetime
from dateutil.relativedelta import relativedelta
import os

def check_environment():
    if 'COLAB_GPU' in os.environ or 'google.colab' in str(get_ipython()):
        return "Colab"
    else:
        return "Local"

environment = check_environment()
print(f"Current Environment: {environment}")

Current Environment: Local


In [2]:
if environment == "Colab":
    from google.colab import drive
    drive.mount('/content/drive')

In [14]:
if environment == "Colab":
    lazy_df = pl.scan_parquet('/content/drive/MyDrive/Quant/database.parquet')
elif environment == "Local":
    lazy_df = pl.scan_parquet('../../Data/SP500/minuteHist2019/database.parquet')
filtered_df = lazy_df.filter(pl.col("year") <= 2024, pl.col("ticker") == "INTC", pl.col("trading_hours") == True).collect()
len(filtered_df)

486223

In [3]:
#Day, week, month arrays
day_data_array = []
week_data_array = []
month_data_array = []
current = datetime.datetime(2019, 10, 1)
end = datetime.datetime(2024, 10, 19)

while current.timestamp()<=end.timestamp():
    temp_df = filtered_df.filter(pl.col("date") == str(current.date()))
    if len(temp_df)>0:
        day_data_array.append(temp_df['open','close','month','trading_minute'].to_numpy())
    current += relativedelta(days=1)

current_week = 40
current_year = 2019
end_week = 38
end_year = 2024

while current_week<=end_week or current_year<end_year:
    temp_df = filtered_df.filter((pl.col("year") == current_year) & (pl.col("week") == current_week))
    if len(temp_df)>0:
        week_data_array.append(temp_df['open','close','month','day_of_week','trading_minute'].to_numpy())
    current_week+=1
    if current_week>52:
        current_week=1
        current_year+=1

current_month = 10
current_year = 2019
end_month = 9
end_year = 2024
while current_month<=end_month or current_year<end_year:
    temp_df = filtered_df.filter((pl.col("year") == current_year) & (pl.col("month") == current_month))
    if len(temp_df)>0:
        month_data_array.append(temp_df['open','close','month','day_of_week','trading_minute'].to_numpy())
    current_month+=1
    if current_month>12:
        current_month=1
        current_year+=1

len(day_data_array), len(week_data_array), len(month_data_array)

(1251, 259, 60)

In [4]:
day_data_array[0]

array([[ 51.97  ,  52.11  ,  10.    ,   0.    ],
       [ 52.13  ,  51.87  ,  10.    ,   1.    ],
       [ 51.8663,  51.86  ,  10.    ,   2.    ],
       ...,
       [ 50.85  ,  50.835 ,  10.    , 387.    ],
       [ 50.84  ,  50.815 ,  10.    , 388.    ],
       [ 50.815 ,  50.77  ,  10.    , 389.    ]])

In [5]:
@jit(nopython=True, nogil=True)
def noFactors(data_array):
    all_minutes = []
    for j in range(1,len(data_array)):
        current = data_array[j]
        previous = data_array[j-1]

        month = current[2]
        minute = current[3]

        all_minutes.append([month, minute, current[0]/previous[0]])
    return all_minutes

In [6]:
results = []
for k in range(len(day_data_array)):
    results += noFactors(day_data_array[k])

In [7]:
len(results), results[0]

(484972, [10.0, 1.0, 1.003078699249567])

In [8]:
#Sort the results
sorted_temp_results = sorted(results.copy(), reverse=True, key = lambda x: x[2])
results_df=pl.DataFrame(sorted_temp_results, schema=["Month", "Minute", "Return"], orient="row")

results_df[:5]

Month,Minute,Return
f64,f64,f64
3.0,1.0,1.032857
3.0,21.0,1.03121
3.0,385.0,1.028627
5.0,55.0,1.02767
12.0,178.0,1.024558


In [9]:
# Minute factor
minute_results = []
all_results = []
for k in range(1,390):
    for j in range(1,13):
        temp_all_results = results_df.filter((pl.col("Minute") == k) & (pl.col("Month") == j))
        all_results.append([j,k,sum(temp_all_results['Return']/len(temp_all_results))])
    temp_minute_results = results_df.filter(pl.col("Minute") == k)
    minute_results.append([k,sum(temp_minute_results['Return']/len(temp_minute_results))])

In [10]:
minute_results.sort(key = lambda x: x[1], reverse=True)

In [11]:
sorted_minute_results = sorted(minute_results.copy(), reverse=True, key = lambda x: x[1])
sorted_all_results = sorted(all_results.copy(), reverse=True, key = lambda x: x[2])
sorted_minute_results[:10],[], sorted_all_results[:10]

([[385, 1.0001612468256704],
  [7, 1.0001180701748777],
  [2, 1.0001160991054716],
  [16, 1.000094298510729],
  [97, 1.0000854697813082],
  [128, 1.0000737840604073],
  [18, 1.0000680556013757],
  [30, 1.0000658115666208],
  [126, 1.0000644171019049],
  [26, 1.0000638350300346]],
 [],
 [[3, 1, 1.0013613207564365],
  [3, 2, 1.0007974195216696],
  [3, 385, 1.000768483816238],
  [10, 1, 1.0007283302578303],
  [11, 1, 1.0006863974741929],
  [1, 2, 1.0006624878946009],
  [3, 26, 1.0005768431110527],
  [9, 2, 1.0005356770990517],
  [4, 1, 1.0005014940377825],
  [3, 3, 1.0004996034150913]])