# Import

In [None]:
# adding common module to path, to be visible
import sys
sys.path.append("../../../common")

# data analysis
import pandas as pd
import numpy as np
np.set_printoptions(precision=4)
np.set_printoptions(suppress=True)
pd.options.display.float_format = '{:.4f}'.format
from datetime import datetime

# api related
import requests
import kagglehub

# database related
import psycopg
import os
import json
from tabulate import tabulate
from utility import dry_insert_into_db, insert_into_db, describe_table

# logging related
import logging
from pathlib import Path

# Init

In [None]:
pwf = str(Path.cwd()).split("bitcoin_analysis")[1]
logger = logging.getLogger("default_logger")

file_handler = logging.FileHandler("../../../logging/logger.txt")
file_formater = logging.Formatter(
    f"{pwf}\n" +
    f">>>\n" +
    f"%(levelname)s: %(message)s.\n" + 
    f"<<< %(asctime)s\n"
)

console_handler = logging.StreamHandler()
console_formater = logging.Formatter(f"Logged %(levelname)s in {pwf}")

file_handler.setFormatter(file_formater)
file_handler.setLevel(logging.INFO)

console_handler.setLevel(logging.INFO)
console_handler.setFormatter(console_formater)

logger.setLevel(logging.INFO)
logger.addHandler(file_handler)
logger.addHandler(console_handler)

In [None]:
with open("../../../secrets/secrets.json") as f:
    SECRETS = json.load(f)

CONNECTION_STRING = f"postgresql://postgres:{SECRETS["postgres_passcode"]}@localhost/bitcoin_analysis"

# Creating tables

The actual creation is done through CLI & `schema_creator.sql`

In [None]:
with open("../../../secrets/secrets.json") as f:
    secrets = json.load(f)
    
with psycopg.connect(CONNECTION_STRING) as conn:
    res = conn.execute("""
        SELECT
        	i.table_name as name,
        	t.tableowner as table_owner,
        	i.is_insertable_into as is_insertable,
        	coalesce(t.tablespace, 'pg_default') as "tablespace"
        FROM 
        	information_schema.tables i 
        	left join pg_tables t on i.table_name = t.tablename
        where i.table_schema = 'public'
        order by i.table_name;
    """)
res

In [None]:
head = [tuple(i.name for i in res.description)]
content = res.fetchall()
head.extend(content)
query_result = head

In [None]:
print(
    tabulate(query_result[1:], headers=query_result[0], tablefmt="pipe")
)

In [None]:
logger.info(
    "Tables have been created.\n" +
    tabulate(query_result[1:], headers=query_result[0], tablefmt="pipe")
)

# Getting Bitcoin data

Twelve data failed at test-time due to restrictions of the free plan

In [None]:
# import
os.environ.update({"KAGGLEHUB_CACHE":"/Users/Misha/Documents/python_projects/data_analysis/bitcoin_analysis/temp/"})
dir_ = Path(kagglehub.dataset_download("mczielinski/bitcoin-historical-data"))
file_name = os.listdir(dir_)[0]
path = dir_ / file_name

print(f"Path to dataset: {path}")

In [None]:
df = pd.read_csv(path)

In [None]:
try:
    assert bool((df >= 0).all(axis=None)) is True, "Some entries are negative"    
    assert bool(df.notna().any(axis="columns").all()) is True, "Some rows are completelly NA"
except AssertionError as err:
    logger.critical(f"Validation of the Bitcon data failed: {err}")
else:
    logger.info(f"Validation of the Bitoin data is successful")

In [None]:
df["Timestamp"] = df["Timestamp"].map(datetime.fromtimestamp)
df.rename(columns=str.lower, inplace=True)
df.set_index("timestamp", inplace=True)

df.drop(columns="volume",inplace=True)

df = df.resample("1h").agg({
    "open":"first",
    "high":"max",
    "low":"min",
    "close":"last"
})
df

In [None]:
df = df.loc[:datetime.fromisoformat("2025-07-01 23:00:00"),:]
df = df.reset_index()
df["timestamp"] = "'" + df["timestamp"].astype(str) + "'"
df.fillna("NULL", inplace=True)
df

In [None]:
dry_insert_into_db(
    CONNECTION_STRING,
    "bitcoin_ohlc",
    df.to_numpy()
)

In [None]:
insert_into_db(
    CONNECTION_STRING,
    "bitcoin_ohlc",
    df.to_numpy()
)

In [None]:
print(describe_table(CONNECTION_STRING, "bitcoin_ohlc", "ts"))

In [None]:
log.info(
    f"Bitcoin OHLC successfully acquired from Kaggle, transformed and loaded\n"
    f"{describe_table(CONNECTION_STRING, "bitcoin_ohlc", "ts")}"
)