# Setting up and Backfilling MariaDB

In [1]:
#Let's Setup a MariaDB Database
#The Database is going to store all the Clean Transformed Data before it gets turned into Features for Modeling
#Nice Practice for Data Versioning and Ensuring Consistency

# Importing necessary libraries

In [2]:
import mariadb

import pandas as pd

from datetime import datetime

import sys
sys.path.append("../scripts/")

import path
import secret

# Connecting to MariaDB Platform (Already Installed and Initialized with an Already Created DB)

In [3]:
# Connect to MariaDB Platform
try:
    conn = mariadb.connect(
        user=secret.MariaDB_User,
        password=secret.MariaDB_Password,
        host="127.0.0.1", #Local DB
        port=3306,
        database=secret.MariaDB_Database

    )
    
except mariadb.Error as e:
    print(f"Error connecting to MariaDB Platform: {e}")
    sys.exit(1)

In [4]:
#Get Cursor
cur = conn.cursor()

# Importing Transformed Data

In [5]:
CleanData = pd.read_parquet(path.TRANSFORMED_DATA_DIR / "BTC-USD_HourlyTransformedData_From2021-12-14 00:00:00+00:00_To2024-01-09 23:00:00+00:00.parquet")

CleanData

Unnamed: 0,Date,Close
298,2021-12-14 00:00:00+00:00,47022.75
297,2021-12-14 01:00:00+00:00,46889.47
296,2021-12-14 02:00:00+00:00,47052.39
295,2021-12-14 03:00:00+00:00,46977.81
294,2021-12-14 04:00:00+00:00,47017.01
...,...,...
17940,2024-01-09 19:00:00+00:00,46890.10
17939,2024-01-09 20:00:00+00:00,46651.31
17938,2024-01-09 21:00:00+00:00,45419.45
17937,2024-01-09 22:00:00+00:00,46278.06


# Creating a Table for our Data

In [6]:
cur.execute(f'CREATE TABLE {secrets.MariaDB_TableName} (Date DATETIME PRIMARY KEY NOT NULL, Close DOUBLE)')

In [7]:
for i in CleanData.values:
    cur.execute(
    f'INSERT {secrets.MariaDB_TableName} VALUES (?, ?)', 
    (datetime(i[0].year, i[0].month, i[0].day, i[0].hour, i[0].minute, i[0].second), i[1]))
    
conn.commit()

# Check Data has been Dumped (There are Thousands of Ways to Load a DF, we chosen this one)

In [8]:
cur.execute("SELECT * FROM ClosePricesBTCUSD")

checkdf = pd.DataFrame(data = [x for x in cur], columns = ["Date", "Close"])

print(checkdf)

                     Date     Close
0     2021-12-14 00:00:00  47022.75
1     2021-12-14 01:00:00  46889.47
2     2021-12-14 02:00:00  47052.39
3     2021-12-14 03:00:00  46977.81
4     2021-12-14 04:00:00  47017.01
...                   ...       ...
18163 2024-01-09 19:00:00  46890.10
18164 2024-01-09 20:00:00  46651.31
18165 2024-01-09 21:00:00  45419.45
18166 2024-01-09 22:00:00  46278.06
18167 2024-01-09 23:00:00  46124.08

[18168 rows x 2 columns]
