## Adding parameters to the data model
**ema20** = exponential moving average 20 days.
**ema50** = exponential moving average 50 days.
**openHigher** = If Open > (1,01 * Previous day Close) then 1 else 0.
**strongClose** = function=(High + Low) / 2. If Close > 'function' then 1 else 0. Maybe not necessary. Lets try model with and without the strongClose parameter.
**averageVolume** = Average volume from past 200 days. 200 simple moving average for volume.
**strongVolume** = If Volume > (2 * averageVolume) then 1 else 0.
**strongVolume6MoPrior** = If strongVolume is equal to 1 more than 3 times in the past 126 days then 1 else 0.
**strongVolumeAfterFiling** = If strongVolume is equal to 1 more 2 times in the following 5 days after financialRelease = 1 then 1 else 0.
**accVolume** = If strongVolume = 1 and strongVolume6MoPrior = 1 and strongVolumeAfterFiling = 1 then 1 else 0.
**uptrend** = If ema50 > (ema50 50 days prior) then 1 else 0.

In [1]:
import requests
import pandas as pd
# from datetime import datetime, timedelta, date
import datetime
import time
from polygon import RESTClient
import logging
import signal
import sys
import pickle
import lz4.frame  # type: ignore
import concurrent.futures
import os
import pandas as pd
import numpy as np
import glob
import nbimporter
from readRawAggs import main

In [None]:
# # Define the path to your existing CSV file
# csv_file_path = r"C:\Users\SamuliMustonen\Documents\Ready Solutions\Docs\StockTrading\Data\ntnx_data_raw.csv"
# # Load the CSV file into a DataFrame
# df = pd.read_csv(csv_file_path, delimiter=';', header=0)

In [None]:
# Function to calculate EMA
def calculate_ema(data, window):
    """Calculate the Exponential Moving Average (EMA) for a given window."""
    # Calculate EMA using ewm() and handle insufficient data gracefully
    ema = (data.groupby('symbol')['close'].ewm(span=window, adjust=False).mean()).round(2)
    
    # Replace initial values with NaN for insufficient data
    if len(data) < window:
        ema[:window] = None  # Set initial window values to None (NaN)
    
    return ema

In [None]:
# Function to calculate openHigher.
def calculate_open_higher(data):
    """Calculate if price opens higher than previous day close."""
    multiplier = 1.01
    openHigher = (data.groupby('symbol')['open'] > (multiplier * data.groupby('symbol')['close'].shift(1))).astype(int)
    
    return openHigher

In [None]:
# Function to calculate average volume
def calculate_avg_volume(data, window):
    """Calculate the Average Volume for a given window."""
    # Calculate average volume and handle insufficient data gracefully
    averageVolume = data.groupby('symbol')['volume'].transform(lambda x: x.rolling(window).mean()).astype(int)
    
    # Replace initial values with NaN for insufficient data
    if len(data) < window:
        averageVolume[:window] = None  # Set initial window values to None (NaN)
    
    return averageVolume

In [None]:
# Function to calculate strong volume
def calculate_strong_volume(data):
    """Calculate the Strong Volume."""
    # Calculate strong volume and handle insufficient data gracefully
    strongVolume = (data.groupby('symbol')['volume'] > 2 * data.groupby('symbol')['averageVolume']).astype(int)
    
    # Replace initial values with NaN for insufficient data
    if len(data) < 100:
        strongVolume = None  # Set initial window values to None (NaN)
    
    return strongVolume

In [None]:
# Function to calculate strong volume 6 months prior of openHigher
def calculate_strong_volume_prior(data, window):
    """Calculate the Strong Volume 6 Months Prior."""
    # Calculate strong volume 6 months prior and handle insufficient data gracefully
    strongVolume6MoPrior = data.groupby('symbol')['strongVolume'].rolling(window).sum().shift(1)

    # Replace initial values with NaN for insufficient data
    if len(data) < 100:
        strongVolume6MoPrior = None  # Set initial window values to None (NaN)
    
    return strongVolume6MoPrior

In [None]:
# Function to calculate strong volume after openHigher
def calculate_strong_volume_after(data):
    """Calculate the Strong Volume After Open Higher."""
    # Calculate strong volume after opening higher and handle insufficient data gracefully
    data['strongVolumeAfterOpenHigher'] = 0
    # Group by 'symbol' (ticker)
    for symbol, group in data.groupby('symbol'):
        
        # Loop through the group (which corresponds to one stock ticker)
        for i in range(len(group) - 5):
            if group.iloc[i]['openHigher'] == 1:  # Check 'openHigher' in the current row
                # Check if 'strongVolume' is True more than 2 times in the next 5 rows
                if group.iloc[i+1:i+6]['strongVolume'].sum() > 2:
                    # Set 'strongVolumeAfterOpenHigher' to 1 for the next 5 rows
                    data.loc[group.index[i+1:i+6], 'strongVolumeAfterOpenHigher'] = 1
    
    return data

In [None]:
# Function to calculate accumulating volume that includes all previous volume functions
def calculate_accumulating_volume(data):
    """Calculate the accumulating volume."""
    accVolume = ((data['strongVolume'] == 1) & (data['strongVolume6MoPrior'] == 1) & (data['strongVolumeAfterOpenHigher'] == 1)).astype(int)
    
    return accVolume

In [None]:
# Function to calculate uptrend from ema50.
def calculate_uptrend(data):
    """Calculate the Strong Volume 6 Months Prior."""
    # Calculate strong volume 6 months prior and handle insufficient data gracefully
    uptrend = (data.groupby('symbol')['ema50'] > data.groupby('symbol')['ema50'].shift(22)).astype(int)
    
    return uptrend

In [2]:
# Use main() from readRawAggs
df = main()
print(df.head())

  symbol    close   open   high     low     volume   timestamp
0   AACT  10.1200  10.10  10.15  10.100   603102.0  2023-06-12
1   AACT  10.1100  10.14  10.14  10.110     5515.0  2023-06-13
2   AACT  10.0998  10.11  10.13  10.095   143889.0  2023-06-14
3   AACT  10.1100  10.11  10.11  10.100  2061100.0  2023-06-15
4   AACT  10.1100  10.12  10.12  10.110   250958.0  2023-06-16


In [None]:
# Use main() from readRawAggs
df = main()
#Sort the data by date
df = df.sort_values(by=['symbol', 'timestamp'])

# Calculate EMA20 and EMA50.
df['ema20'] = calculate_ema(df, 20)
df['ema50'] = calculate_ema(df, 50)

# Calculate when stock opens higher than previous day close.
df['openHigher'] = calculate_open_higher(df)

# Calculate average volume from past 100 days.
df['averageVolume'] = calculate_avg_volume(df, 100)

# Calculate strong volume. 1 if over 2x average volume.
df['strongVolume'] = calculate_strong_volume(df)

# Calculate strong volume 6 months prior opening higher. 126 trading days is 6 months.
df['strongVolume6MoPrior'] = calculate_strong_volume_prior(df, 126)

# Calculate strong volume 6 months prior opening higher. 126 trading days is 6 months.
df['strongVolumeAfterOpenHigher'] = calculate_strong_volume_prior(df, 126)




# Calculate EMA20 and EMA50
df['ema20'] = calculate_ema(df, 20)
df['ema50'] = calculate_ema(df, 50)
# Round ema20 and ema50 to 2 decimal places
df['ema20'] = df['ema20'].round(2)
df['ema50'] = df['ema50'].round(2)

# **openHigher** = If Open > (1.01 * Previous day Close) then 1 else 0
df['openHigher'] = (df['open'] > (1.01 * df['close'].shift(1))).astype(int)

# **strongClose** = (High + Low) / 2. If Close > 'function' then 1 else 0
# df['function'] = (df['high'] + df['low']) / 2
# df['strongClose'] = (df['close'] > df['function']).astype(int)

# Convert Volume to integer format
df['volume'] = df['volume'].astype(int)  # Convert volume to integer

# **averageVolume** = Average volume from past 200 days (SMA for volume)
df['averageVolume'] = df['volume'].rolling(window=200).mean()
df['averageVolume'] = df['averageVolume'].astype(int)

# **strongVolume** = If Volume > (2 * averageVolume) then 1 else 0
df['strongVolume'] = (df['volume'] > 2 * df['averageVolume']).astype(int)

# **strongVolume6MoPrior** = If strongVolume is 1 more than 3 times in the past 126 days then 1 else 0
df['strongVolume6MoPrior'] = df['strongVolume'].rolling(window=126).sum().shift(1)
df['strongVolume6MoPrior'] = (df['strongVolume6MoPrior'] > 3).astype(int)

# **strongVolumeAfterFiling** = If strongVolume is 1 more than 2 times in the 5 days after financialRelease = 1 then 1 else 0
# df['strongVolumeAfterFiling'] = 0
# for i in range(len(df) - 5):
#     if df.loc[i, 'financialRelease'] == 1:
#         if df.loc[i+1:i+5, 'strongVolume'].sum() > 2:
#             df.loc[i+1:i+5, 'strongVolumeAfterFiling'] = 1

# **accVolume** = If strongVolume = 1, strongVolume6MoPrior = 1, and strongVolumeAfterFiling = 1 then 1 else 0
df['accVolume'] = ((df['strongVolume'] == 1) & 
                   (df['strongVolume6MoPrior'] == 1)) 
                #    (df['strongVolumeAfterFiling'] == 1)).astype(int)

# **uptrend** = If ema50 > (ema50 50 days prior) then 1 else 0
df['uptrend'] = (df['ema50'] > df['ema50'].shift(50)).astype(int)

# Drop the 'function' column as it's intermediate
df.drop(columns=['function'], inplace=True)

# Save the updated DataFrame to a new CSV file with a timestamp
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
new_csv_file_path = f"C:/Users/SamuliMustonen/Documents/Ready Solutions/Docs/StockTrading/Data/data_model_{timestamp}.csv"
df.to_csv(new_csv_file_path, index=False)

print(f"Updated data with new columns saved to {new_csv_file_path}")
