In [85]:
# ! pip install pandas tomli matplotlib tqdm

In [87]:
from tqdm import tqdm
import pandas as pd
import tomli

In [93]:
# Load Config
CONFIG_FILE_PATH = "../config.tomli"

with open(CONFIG_FILE_PATH, 'rb') as config_file:
    config = tomli.load(config_file)

ETF_DATA_DRIVE_PATH = f"../{config['data']['etfs']}"
STOCK_DATA_DRIVE_PATH = f"../{config['data']['stocks']}"
PROCESSED_DATA_DRIVE_PATH = f"../{config['data']['processed']}"
ENGINEERED_DATA_DRIVE_PATH = f"../{config['data']['engineered']}"
SYMBOLS_FILE_PATH = f"../{config['data']['symbols']}"

DATASET_PATH = f"{PROCESSED_DATA_DRIVE_PATH}/dataset.parquet"
ENG_DATASET_PATH = f"{ENGINEERED_DATA_DRIVE_PATH}/dataset.parquet"

data_dtypes = config['etf_stock_data_type']
symbols_dtype = config['symbols_data_types']

date_format = config['format']['date_format']

In [91]:
def get_featured_dataset(security_df:pd.DataFrame)->pd.DataFrame:
    security_df = security_df.set_index('Date')
    security_df = security_df.resample('D').ffill()
    security_df['vol_moving_avg'] = security_df['Volume'].rolling(window=30).mean()
    security_df['adj_close_rolling_med'] = security_df['Adj Close'].rolling(window=30).median()
    security_df = security_df.reset_index()
    return security_df

In [92]:
# read dataset
dataset = pd.read_parquet(DATASET_PATH)
dataset['Date'] = pd.to_datetime(dataset['Date'], format=date_format)

In [96]:
%time
# get unique securities
securities =  dataset['Symbol'].unique()

security_dfs = []

# loop over all securities to obtain the feature dataset
for security in tqdm(securities):
    # get data for security
    security_df = dataset[dataset['Symbol'] == security]
    # get featured dataset
    security_df = get_featured_dataset(security_df=security_df)
    # append the security df
    security_dfs.append(security_df)

# merge all dfs to create engineered dataset 
eng_dataset = pd.concat(security_dfs)
del security_dfs

# discarding null values 
eng_dataset = eng_dataset.dropna()

# save the engineered dataset
eng_dataset.to_parquet(ENG_DATASET_PATH, index=False)

In [97]:
# ! pip freeze > ../requirements.txt