In [1]:
import polars as pl
import os
import importlib
import sys
import json
from datetime import datetime

from pattern_detector import utils


### Calculate patterns

In [2]:
filepath = os.path.join("data", "silver", "combined.parquet")
dates = pl.read_parquet(filepath, columns="date").to_series()

all_patterns = utils.generate_possible_patterns(dates)

### Get names of tickers

In [3]:
schema = pl.read_parquet_schema(filepath)

tickers = list(schema.keys())[1:]  # all but date column

### Run for each ticker

In [4]:
importlib.reload(utils)

num_tickers = len(tickers)
found_patterns = {}
for i, ticker in enumerate(tickers):
    # read df
    df = pl.read_parquet(filepath, columns=['date', ticker])

    # rename column
    df = df.select(
        pl.col('date'),
        pl.col(ticker).alias("value")
    )

    # find patterns
    good_patterns = utils.find_good_patterns(df, possible_patterns=all_patterns)
    pruned_patterns = utils.prune_patterns(good_patterns)

    if pruned_patterns:
        rows = []
        for pattern in pruned_patterns:
            row = {'pattern': [x.isoformat() for x in pattern]}  # must convert to str for json
            row['days_decreased'], row['mean'], row['stdev'] = utils.summarize_pattern(pattern, df)
            row['next_days_decreased'], row['next_mean'], row['next_stdev'] = utils.summarize_pattern_next_day(pattern, df)
            
            rows.append(row)

        found_patterns[ticker] = rows

    # print progress
    percent_complete = int(round((i+1) / num_tickers * 100, 0))
    sys.stdout.write(f"\r[{i+1}/{num_tickers}]: {percent_complete}%: {ticker} ")
    sys.stdout.flush()

[2109/2109]: 2108%: HWBK  

### Save found patterns

In [15]:
filename = 'found_patterns_' + str(datetime.now().timestamp()) + '.json'

outfile_path = os.path.join('data', 'gold', filename)

with open(outfile_path, 'w') as outfile: 
    json.dump(found_patterns, outfile)