##### 0. Notebook setup and imports <br>
Here we make the preparation necessary for the notebook to run

In [None]:
# 0.1 Notebook setup
import sys
import os
print(sys.path)

NOTEBOOK_DIR = os.getcwd()
print(NOTEBOOK_DIR)
ROOT_DIR = os.path.dirname(NOTEBOOK_DIR)
print(ROOT_DIR)
SRC_DIR = os.path.join(ROOT_DIR, 'src')
print(SRC_DIR)

# Adds project's roof to sys.path
if ROOT_DIR not in sys.path:
    sys.path.append(ROOT_DIR)
if SRC_DIR not in sys.path:
    sys.path.append(SRC_DIR)

In [None]:
# 0.2 Imports

import requests                   # Allows me to make requests from web pages
import pandas as pd               # Has structure that supports many kinds of visualization from other libraries
import sqlite3                    # Chosen SQL database
from datetime import datetime, timezone     # Helps us convert the UNIX time into legible format and vice-versa

##### 1. Creating SQL Database

In [2]:
# 1. Creating connection

connection = sqlite3.connect('../data/crypto.db') #connects Python to SQLite
cursor = connection.cursor()

In [None]:
# 1.2. Creating the table

cursor.execute ("""
    CREATE TABLE IF NOT EXISTS bitcoin_prices (
                date DATETIME PRIMARY KEY NOT NULL,
                open REAL NOT NULL,
                high REAL NOT NULL,
                low REAL NOT NULL,
                close REAL NOT NULL,
                volume REAL NOT NULL
                )
""")

connection.commit()

# Querying table structure to verify it
cursor.execute("PRAGMA table_info(bitcoin_prices);")

columns = cursor.fetchall()
for col in columns:
    print(col)

(0, 'date', 'DATETIME', 1, None, 1)
(1, 'open_price', 'REAL', 1, None, 0)
(2, 'high_price', 'REAL', 1, None, 0)
(3, 'low', 'REAL', 1, None, 0)
(4, 'close_price', 'REAL', 1, None, 0)
(5, 'volume', 'REAL', 1, None, 0)


##### 2. Importing API data
       All of this is provided by Historical OHLCV endpoint of CoinDesk's API. It's documentation tell us which parameters are needed, as we'll see in the following cells.
    You can learn more in: https://developers.coindesk.com/documentation/data-api/index_cc_v1_historical_days


In [None]:
# 2.1. Defining and converting start_time
        ### Marking a start date is not required, but if you want to, the API asks for a UNIX timestamp in seconds in the field "to_ts". We'll do this conversion:

date_str = "2025-03-31 01:00:00"                                    # Input desired star date in the format is YY-MM-DD HH:MM:SS
date_timestamp = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S")   # Converts string to datetime format. 

start_date = int(date_timestamp.timestamp())                        # timestamp() returns the number of seconds in UNIX.

print(start_date)                                                   # You can check the output printing the

1743393600


In [None]:
#2.2 Requesting data from the API

# CryptoCompare URL
CC_URL = "https://data-api.coindesk.com/index/cc/v1/historical/hours"

# Parameters required to request the data
params={
    "market":"cadli",                                       # REQUIRED FIELD! This one is a default market index, but you can pick many others
    "instrument":"BTC-USD",                                 # REQUIRED FIELD! It's the trading pair
    "limit": 2000,                                           # Number of datapoints returned
    "aggregate":1,                                          # Defines how you'll group data. In this case, I've choosen the 1 hour timespan
    "fill":"true",                                          # Boolean value. True has it return datapoints even in periods with no trading activity
    "apply_mapping":"true",                                 # Determines if provided instrument values are converted according to internal mappings. (if the pair was "USD-BTC", the true value would convert the results accordingly
    "response_format":"JSON",                               # It may be defined as JSON or CSV
    "to_ts" : start_date,                                   # Defines a start date
    }

# Getting response and recording it in a variable
response = requests.get(CC_URL, params=params)
bitcoin_data = response.json()


In [None]:
# 2.2.1 Iterates on the list of dictionaries provided by the API ensuring that there's only dictionaries there
if isinstance(bitcoin_data, dict) and "Data" in bitcoin_data:
    bitcoin_data = bitcoin_data["Data"]

print(bitcoin_data)

[{'UNIT': 'HOUR', 'TIMESTAMP': 1736197200, 'TYPE': '267', 'MARKET': 'cadli', 'INSTRUMENT': 'BTC-USD', 'OPEN': 102196.643949755, 'HIGH': 102383.400987382, 'LOW': 101640.113923497, 'CLOSE': 101659.740266516, 'FIRST_MESSAGE_TIMESTAMP': 1736197200, 'LAST_MESSAGE_TIMESTAMP': 1736200799, 'FIRST_MESSAGE_VALUE': 102196.568778183, 'HIGH_MESSAGE_VALUE': 102383.400987382, 'HIGH_MESSAGE_TIMESTAMP': 1736197695, 'LOW_MESSAGE_VALUE': 101640.113923497, 'LOW_MESSAGE_TIMESTAMP': 1736200696, 'LAST_MESSAGE_VALUE': 101659.740266516, 'TOTAL_INDEX_UPDATES': 65087, 'VOLUME': 11167.1830512779, 'QUOTE_VOLUME': 1139953965.42091, 'VOLUME_TOP_TIER': 6342.043969697, 'QUOTE_VOLUME_TOP_TIER': 647427529.820025, 'VOLUME_DIRECT': 1906.54432559, 'QUOTE_VOLUME_DIRECT': 194609017.864059, 'VOLUME_TOP_TIER_DIRECT': 1634.74369618, 'QUOTE_VOLUME_TOP_TIER_DIRECT': 166870679.405795}, {'UNIT': 'HOUR', 'TIMESTAMP': 1736200800, 'TYPE': '267', 'MARKET': 'cadli', 'INSTRUMENT': 'BTC-USD', 'OPEN': 101659.740266516, 'HIGH': 102112.52234

In [None]:
# 2.2.2 Verifies if there's only dictionaries in the cleaned list
for item in bitcoin_data:
    if not isinstance(item, dict):
        print(f"Invalid item found: {item}")

##### 3. Processing the data

In [None]:
# 3.1 Subsetting for the columns we need

selected_columns = ["TIMESTAMP", "OPEN", "HIGH", "LOW", "CLOSE", "VOLUME"]

# Iterates on the list of dictionaries provided by the API, subsetting the contained dictionaries so that we only have what we need
btc_clean_data = [
    {column: item[column] for column in selected_columns if column in item}
    for item in bitcoin_data
]


In [None]:
# 3.1.1 Checking the entries in the list

for i, item in enumerate(btc_clean_data):
    print(f"Entry {i+1}:")
    for k, v in item.items():
        print(f"  {k}: {v}")
    print("-" * 30)


Entry 1:
  TIMESTAMP: 1736197200
  OPEN: 102196.643949755
  HIGH: 102383.400987382
  LOW: 101640.113923497
  CLOSE: 101659.740266516
  VOLUME: 11167.1830512779
------------------------------
Entry 2:
  TIMESTAMP: 1736200800
  OPEN: 101659.740266516
  HIGH: 102112.522349383
  LOW: 101647.492422707
  CLOSE: 102047.45542071
  VOLUME: 5065.65861035709
------------------------------
Entry 3:
  TIMESTAMP: 1736204400
  OPEN: 102047.45542071
  HIGH: 102285.622020661
  LOW: 101941.107536873
  CLOSE: 102280.873054768
  VOLUME: 6498.48415850902
------------------------------
Entry 4:
  TIMESTAMP: 1736208000
  OPEN: 102280.873054768
  HIGH: 102713.731333029
  LOW: 102006.360074212
  CLOSE: 102094.449216145
  VOLUME: 12311.7875268933
------------------------------
Entry 5:
  TIMESTAMP: 1736211600
  OPEN: 102094.449216145
  HIGH: 102119.021951596
  LOW: 101744.721925884
  CLOSE: 101968.617507173
  VOLUME: 8433.23999503962
------------------------------
Entry 6:
  TIMESTAMP: 1736215200
  OPEN: 101968

In [None]:
# 3.2 Now we'll do some data processing concerning the TIMESTAMP using datetime library

# I'm now using a function to iterate it with each item in the list
def convert_timestamp(timestamp):
    return (datetime.fromtimestamp(timestamp, tz = timezone.utc)).strftime('%Y-%m-%d %H:%M:%S')

for item in btc_clean_data:
    item["TIMESTAMP"] = convert_timestamp(item["TIMESTAMP"])


# Printing new registries cleaned
for i, item in enumerate(btc_clean_data):
    print(f"Registry {i+1}: {item}")

Registry 1: {'TIMESTAMP': '2025-01-06 21:00:00', 'OPEN': 102196.643949755, 'HIGH': 102383.400987382, 'LOW': 101640.113923497, 'CLOSE': 101659.740266516, 'VOLUME': 11167.1830512779}
Registry 2: {'TIMESTAMP': '2025-01-06 22:00:00', 'OPEN': 101659.740266516, 'HIGH': 102112.522349383, 'LOW': 101647.492422707, 'CLOSE': 102047.45542071, 'VOLUME': 5065.65861035709}
Registry 3: {'TIMESTAMP': '2025-01-06 23:00:00', 'OPEN': 102047.45542071, 'HIGH': 102285.622020661, 'LOW': 101941.107536873, 'CLOSE': 102280.873054768, 'VOLUME': 6498.48415850902}
Registry 4: {'TIMESTAMP': '2025-01-07 00:00:00', 'OPEN': 102280.873054768, 'HIGH': 102713.731333029, 'LOW': 102006.360074212, 'CLOSE': 102094.449216145, 'VOLUME': 12311.7875268933}
Registry 5: {'TIMESTAMP': '2025-01-07 01:00:00', 'OPEN': 102094.449216145, 'HIGH': 102119.021951596, 'LOW': 101744.721925884, 'CLOSE': 101968.617507173, 'VOLUME': 8433.23999503962}
Registry 6: {'TIMESTAMP': '2025-01-07 02:00:00', 'OPEN': 101968.617507173, 'HIGH': 102056.8980792

In [None]:
# 3.3 Renaming the keys from the dictionary

# Mapping dictionary
mapped_list = { 
    "TIMESTAMP" : "date",
    "OPEN" : "open",
    "HIGH" : "high",
    "LOW" : "low",
    "CLOSE" : "close",
    "VOLUME" : "volume"
}

# Creating a new list with the key names updated
btc_data = [
    {mapped_list[key]: value for key, value in item.items()}
    for item in btc_clean_data
]

# Exibir o resultado formatado
for i, item in enumerate(btc_data):
    print(f"Registro {i+1}: {item}")


Registro 1: {'date': '2025-01-06 21:00:00', 'open': 102196.643949755, 'high': 102383.400987382, 'low': 101640.113923497, 'close': 101659.740266516, 'volume': 11167.1830512779}
Registro 2: {'date': '2025-01-06 22:00:00', 'open': 101659.740266516, 'high': 102112.522349383, 'low': 101647.492422707, 'close': 102047.45542071, 'volume': 5065.65861035709}
Registro 3: {'date': '2025-01-06 23:00:00', 'open': 102047.45542071, 'high': 102285.622020661, 'low': 101941.107536873, 'close': 102280.873054768, 'volume': 6498.48415850902}
Registro 4: {'date': '2025-01-07 00:00:00', 'open': 102280.873054768, 'high': 102713.731333029, 'low': 102006.360074212, 'close': 102094.449216145, 'volume': 12311.7875268933}
Registro 5: {'date': '2025-01-07 01:00:00', 'open': 102094.449216145, 'high': 102119.021951596, 'low': 101744.721925884, 'close': 101968.617507173, 'volume': 8433.23999503962}
Registro 6: {'date': '2025-01-07 02:00:00', 'open': 101968.617507173, 'high': 102056.898079246, 'low': 101700.468286027, '

In [11]:
connection = sqlite3.connect('../data/crypto.db') #connects Python to SQLite
cursor = connection.cursor()


cursor.execute ("""
        SELECT *
        FROM bitcoin_prices
""")

cursor.execute("PRAGMA table_info(bitcoin_prices);")

columns = cursor.fetchall()
for col in columns:
    print(col)

(0, 'date', 'DATETIME', 1, None, 1)
(1, 'open', 'REAL', 1, None, 0)
(2, 'high', 'REAL', 1, None, 0)
(3, 'low', 'REAL', 1, None, 0)
(4, 'close', 'REAL', 1, None, 0)
(5, 'volume', 'REAL', 1, None, 0)



#### 4. Inserting data into the database

In [None]:
# Inserting query
insert_query = '''
INSERT OR IGNORE INTO bitcoin_prices (date, open_price, high_price, low, close_price, volume)
VALUES (?, ?, ?, ?, ?, ?)
'''

# Converting data into a list of tuples before e insrting them
data_to_insert = [
    (item["date"], item["open"], item["high"], item["low"], item["close"], item["volume"])
    for item in btc_data
]

# Executing insertions

cursor.executemany(insert_query, data_to_insert)

# Saving alterations and closing conection
connection.commit()




#### 5. Quering the data

In [None]:
#Quering data (here I am just verifying if the table is working)

        
connection = sqlite3.connect('../data/crypto.db') #connects Python to SQLite
cursor = connection.cursor()

cursor.execute ("""
        SELECT *
        FROM bitcoin_prices
        ORDER BY date;
""")

result = cursor.fetchall()

for x in result:
    print(x)

('2025-01-06 21:00:00', 102196.643949755, 102383.400987382, 101640.113923497, 101659.740266516, 11167.1830512779)
('2025-01-06 22:00:00', 101659.740266516, 102112.522349383, 101647.492422707, 102047.45542071, 5065.65861035709)
('2025-01-06 23:00:00', 102047.45542071, 102285.622020661, 101941.107536873, 102280.873054768, 6498.48415850902)
('2025-01-07 00:00:00', 102280.873054768, 102713.731333029, 102006.360074212, 102094.449216145, 12311.7875268933)
('2025-01-07 01:00:00', 102094.449216145, 102119.021951596, 101744.721925884, 101968.617507173, 8433.23999503962)
('2025-01-07 02:00:00', 101968.617507173, 102056.898079246, 101700.468286027, 101726.966625477, 6319.05682950001)
('2025-01-07 03:00:00', 101726.966625477, 101845.514135434, 101574.526776035, 101731.903520634, 6326.98790601675)
('2025-01-07 04:00:00', 101731.903520634, 101951.901873924, 101706.803510684, 101707.120021451, 4531.64285242173)
('2025-01-07 05:00:00', 101707.120021451, 101813.717179016, 101621.196924195, 101759.58844