In [None]:
import requests
import pandas as pd
import matplotlib.pyplot as plt
import pandas_ta as ta 
import numpy as np
import os
from config import *

In [None]:
"""
SMA/EMA: 5, 10, 20, 50, 100 days (or pick a small subset).

MACD: Standard (12, 26, 9) periods.

RSI:  7 9 14 21

Stochastic Oscillator: Commonly (14, 3, 3).

Williams’ %R: Commonly uses a 14-day period.

MOM (Momentum): 10 or 14 days.

OBV (On-Balance Volume): Typically accumulative (one version).

CCO (Chande’s CMO): Often 14 or 20 days.
"""

In [None]:

df = pd.read_csv(r"C:\Users\matti\OneDrive\Thesis\Data\filtered_price.csv", parse_dates=["date"]) #automatically converts date column to dates
df.set_index("date", inplace=True) #set date as index
df.sort_index(inplace=True)

for length in [5,10,20,50,100]:
    df.ta.ema(close = "close", length=length, append=True) #EMA
    df.ta.sma(close = "close", length=length, append=True) #SMA

df.ta.macd(close = "close", fast=12, slow=26, signal=9, append=True) #MACD

for length in [7,9,14,21]:
    df.ta.rsi(close = "close", length=length, append=True) #RSI

df.ta.stoch(close = "close", high="high", low="low", k=14, d=3, append=True) #Stochastic Oscillator

df.ta.willr(high="high", low="low", close="close", length=14, append=True) #Williams %R

for length in [5,10,14,21]:
    df.ta.mom(close="close", length=length, append=True) #MOM (Momentum)

df.ta.obv(close="close", volume="volume", append=True) #OBV (On-Balance Volume)

for length in [14,20]:
    df.ta.cmo(close="close", length=length, append=True) #CCO (Chande’s CMO)

df.to_csv(r"C:\Users\matti\OneDrive\Thesis\Data\filtered_price_indicators.csv", index=True) #save to csv
    

In [None]:
df = pd.read_csv(r"C:\Users\matti\OneDrive\Thesis\Data\filtered_price_indicators.csv", parse_dates=["date"]) 

print(df.columns)

start_date = "2022-03-03"
end_date = "2024-12-31"

full_range = pd.date_range(start=start_date, end=end_date, freq="B")
missing_dates = full_range.difference(df["date"])
print(missing_dates) #These are all holidays and thus not trading days, so no data is available
# A part from this, all other dates have data, which is good

In [None]:
df2 = df.copy()

df2["log_return"] = np.log(df2["close"] / df2["close"].shift(1))

n_lags = 5  #How many days back you want to create

for lag in range(1, 6): #5 days
    df2[f"close_lag_{lag}"] = df2["close"].shift(lag)

df2.to_csv(r"C:\Users\matti\OneDrive\Thesis\Data\FINAL_price_indicators.csv", index=True) #save to csv
