# Setting up and Backfilling MariaDB

In [1]:
#Let's Generate our Features for Modeling which will also be stored in a Feature Store

# Importing necessary libraries

In [2]:
import pandas as pd

from datetime import datetime

import sys
sys.path.append("../scripts/")

import path

# Importing Transformed Data

In [3]:
CleanData = pd.read_parquet(path.TRANSFORMED_DATA_DIR / "BTC-USD_HourlyTransformedData_From2021-12-14 00:00:00+00:00_To2024-01-09 23:00:00+00:00.parquet")

CleanData

Unnamed: 0,Date,Close
298,2021-12-14 00:00:00+00:00,47022.75
297,2021-12-14 01:00:00+00:00,46889.47
296,2021-12-14 02:00:00+00:00,47052.39
295,2021-12-14 03:00:00+00:00,46977.81
294,2021-12-14 04:00:00+00:00,47017.01
...,...,...
17940,2024-01-09 19:00:00+00:00,46890.10
17939,2024-01-09 20:00:00+00:00,46651.31
17938,2024-01-09 21:00:00+00:00,45419.45
17937,2024-01-09 22:00:00+00:00,46278.06


# Generating Features

In [4]:
#As Features, again for the sake of semplicity in Modeling, we are going to use the Last 2 Weeks of Prices to Predict the Next One

In [5]:
HoursTwoWeeks = 24*7*2

In [6]:
#Generating Columns for FeaturesDF
cols = [f'Close_{x}_Hours_Ago' for x in range(1, HoursTwoWeeks+1)]
cols.reverse()
cols.append("ActualClose")
cols.append("ActualDate")

In [7]:
#Generating Values for FeaturesDF
#[[i for i in prices[x-HoursTwoWeeks-1:x]] for x in range(HoursTwoWeeks+1, len(prices))]

vals = []
prices = list(CleanData["Close"])
dates = list(CleanData["Date"])

for x in range(HoursTwoWeeks+1, len(prices)):
    featurevals = [i for i in prices[x-HoursTwoWeeks-1:x]]
    featurevals.append(dates[x])
    vals.append(featurevals)

In [8]:
FeaturesDF = pd.DataFrame(data=vals, columns=cols)

In [9]:
FeaturesDF.columns

Index(['Close_336_Hours_Ago', 'Close_335_Hours_Ago', 'Close_334_Hours_Ago',
       'Close_333_Hours_Ago', 'Close_332_Hours_Ago', 'Close_331_Hours_Ago',
       'Close_330_Hours_Ago', 'Close_329_Hours_Ago', 'Close_328_Hours_Ago',
       'Close_327_Hours_Ago',
       ...
       'Close_8_Hours_Ago', 'Close_7_Hours_Ago', 'Close_6_Hours_Ago',
       'Close_5_Hours_Ago', 'Close_4_Hours_Ago', 'Close_3_Hours_Ago',
       'Close_2_Hours_Ago', 'Close_1_Hours_Ago', 'ActualClose', 'ActualDate'],
      dtype='object', length=338)

In [10]:
FeaturesDF.tail()

Unnamed: 0,Close_336_Hours_Ago,Close_335_Hours_Ago,Close_334_Hours_Ago,Close_333_Hours_Ago,Close_332_Hours_Ago,Close_331_Hours_Ago,Close_330_Hours_Ago,Close_329_Hours_Ago,Close_328_Hours_Ago,Close_327_Hours_Ago,...,Close_8_Hours_Ago,Close_7_Hours_Ago,Close_6_Hours_Ago,Close_5_Hours_Ago,Close_4_Hours_Ago,Close_3_Hours_Ago,Close_2_Hours_Ago,Close_1_Hours_Ago,ActualClose,ActualDate
17826,42138.37,42099.94,42122.45,42337.53,42471.06,42515.53,42355.7,42459.92,42232.84,42242.6,...,46464.36,46643.78,46193.87,46858.17,46574.36,46876.36,46921.73,46738.77,46657.29,2024-01-09 19:00:00+00:00
17827,42099.94,42122.45,42337.53,42471.06,42515.53,42355.7,42459.92,42232.84,42242.6,42297.37,...,46643.78,46193.87,46858.17,46574.36,46876.36,46921.73,46738.77,46657.29,46890.1,2024-01-09 20:00:00+00:00
17828,42122.45,42337.53,42471.06,42515.53,42355.7,42459.92,42232.84,42242.6,42297.37,42454.83,...,46193.87,46858.17,46574.36,46876.36,46921.73,46738.77,46657.29,46890.1,46651.31,2024-01-09 21:00:00+00:00
17829,42337.53,42471.06,42515.53,42355.7,42459.92,42232.84,42242.6,42297.37,42454.83,42425.66,...,46858.17,46574.36,46876.36,46921.73,46738.77,46657.29,46890.1,46651.31,45419.45,2024-01-09 22:00:00+00:00
17830,42471.06,42515.53,42355.7,42459.92,42232.84,42242.6,42297.37,42454.83,42425.66,42467.26,...,46574.36,46876.36,46921.73,46738.77,46657.29,46890.1,46651.31,45419.45,46278.06,2024-01-09 23:00:00+00:00


# Dumping Features Data to Disk

In [11]:
start = (CleanData["Date"].iloc[0])
finish = (CleanData["Date"].iloc[-1])
product_id = "BTC-USD"

FeaturesDF.to_parquet(path.FEATURES_DATA_DIR / f'{product_id}_FeaturesData_From{start}_To{finish}.parquet')