In [17]:
import pandas as pd
import numpy as np
import sqlite3

In [19]:
# Connect to the db and query for the time column

try:
    conn = sqlite3.connect('../BTC_data.db')
    query = "SELECT time FROM BTC_data;"
    df = pd.read_sql_query(query, conn)
except sqlite3.Error as e:
    print(f"An error occurred: {e}")
finally:
    if conn:
        conn.close()


In [20]:
# Feature Engineering

df['time'] = pd.to_datetime(df['time'])
df['hour'] = df['time'].dt.hour
df['day_of_week'] = df['time'].dt.dayofweek  # Monday=0, Sunday=6

# Define opening hours for USA (9-17 UTC), EU (8-16 UTC), ASIA (1-9 UTC)
df['USA_open'] = (df['hour'] >= 9) & (df['hour'] < 17)
df['EU_open'] = (df['hour'] >= 8) & (df['hour'] < 16)
df['ASIA_open'] = (df['hour'] >= 1) & (df['hour'] < 9)


In [21]:
df.head()

Unnamed: 0,time,hour,day_of_week,USA_open,EU_open,ASIA_open
0,2022-09-01 06:45:00,6,3,False,False,True
1,2022-09-01 07:00:00,7,3,False,False,True
2,2022-09-01 07:15:00,7,3,False,False,True
3,2022-09-01 07:30:00,7,3,False,False,True
4,2022-09-01 07:45:00,7,3,False,False,True


# Append back to the db

try:
    conn = sqlite3.connect('../BTC_data.db')
    # Append new columns only if they don't exist
    cursor = conn.cursor()
    cursor.execute("PRAGMA table_info(BTC_data);")
    existing_columns = [column[1] for column in cursor.fetchall()]
    for col in ['hour', 'day_of_week', 'USA_open', 'EU_open', 'ASIA_open']:
        if col not in existing_columns:
            conn.execute(f"ALTER TABLE BTC_data ADD COLUMN {col} INTEGER;")
    conn.commit()

    # Update the new columns
    for index, row in df.iterrows():
        update_query = f"""
        UPDATE BTC_data
        SET hour = {row['hour']},
            day_of_week = {row['day_of_week']},
            USA_open = {int(row['USA_open'])},
            EU_open = {int(row['EU_open'])},
            ASIA_open = {int(row['ASIA_open'])}
        WHERE time = "{row['time']}";
        """
        conn.execute(update_query)
    conn.commit()
    
    # Verify that the update is successful
    df_check = pd.read_sql_query("SELECT * FROM BTC_data LIMIT 5;", conn)
    print(df_check)

except sqlite3.Error as e:
    print(f"An error occurred: {e}")
    conn.rollback()
finally:
    if conn:
        conn.close()



In [24]:
# Studying what other features we could add to the data from itself

df.columns

Index(['time', 'hour', 'day_of_week', 'USA_open', 'EU_open', 'ASIA_open'], dtype='object')

In [28]:
# Query db for head() of all columns
# Set the display option to show all columns
pd.set_option('display.max_columns', None)

# Connect to the SQLite database
try:
    conn = sqlite3.connect('../BTC_data.db')
    # Execute the query and store the result in a DataFrame
    df = pd.read_sql_query("SELECT * FROM BTC_data LIMIT 30;", conn)
    print(df)
except sqlite3.Error as e:
    print(f"An error occurred: {e}")
finally:
    # Close the connection
    if conn:
        conn.close()

    index                 time     open     high      low    close  \
0       0  2022-09-01 06:45:00  19990.5  20000.0  19945.0  19975.0   
1       1  2022-09-01 07:00:00  19975.0  19975.0  19876.0  19933.0   
2       2  2022-09-01 07:15:00  19933.0  19961.5  19878.0  19898.0   
3       3  2022-09-01 07:30:00  19898.0  19908.0  19771.0  19879.0   
4       4  2022-09-01 07:45:00  19879.0  19924.5  19862.0  19891.5   
5       5  2022-09-01 08:00:00  19891.5  19925.0  19844.0  19924.5   
6       6  2022-09-01 08:15:00  19924.5  19928.0  19871.0  19871.0   
7       7  2022-09-01 08:30:00  19871.0  19912.5  19807.0  19882.5   
8       8  2022-09-01 08:45:00  19882.5  19920.5  19863.5  19920.0   
9       9  2022-09-01 09:00:00  19920.0  19979.0  19848.0  19858.5   
10     10  2022-09-01 09:15:00  19858.5  19892.5  19827.5  19892.0   
11     11  2022-09-01 09:30:00  19892.0  19915.0  19874.0  19915.0   
12     12  2022-09-01 09:45:00  19915.0  19950.0  19896.0  19921.5   
13     13  2022-09-0

In [29]:
# I would like to categorize the magnitudes data with categorical classes that make somewhat sense in differentiating different level of magnitudes and that fit the data in explaining differences.
df.columns

Index(['index', 'time', 'open', 'high', 'low', 'close', 'vwap', 'upper_b1',
       'lower_b1', 'upper_b2', 'lower_b2', 'upper_b3', 'lower_b3', 'basis',
       'upper', 'lower', 'parabolicsar', 'twap', 'volume', 'volume_ma', 'adx',
       'efi', 'atr', 'obv', 'roc', 'cci', 'target_close', 'hour',
       'day_of_week', 'USA_open', 'EU_open', 'ASIA_open'],
      dtype='object')

In [33]:
from heads_scripts import export_df_head_to_csv


# Example usage
export_df_head_to_csv(df, num_rows=30, base_directory='..', folder_name='heads_csv', file_name='BTC_cats.csv')


ModuleNotFoundError: No module named 'heads_scripts'