# Database Access with ClickHouse Connect

Please install the following packages:
- clickhouse_connect
- python-dotenv

Need to open ssh tunnel
- open terminal/command prompt
- run(with your username replaced): ssh -L 8123:localhost:8123 <user>@ppolak5.ams.stonybrook.edu

## Custom SQL prompts to Clickhouse Database

In [1]:
# import helper functions
from utils.clickhouse_query import *

# Define the query
# query = "SELECT * FROM TRADESDB.trades2017view WHERE (Symbol = 'AAPL') AND (Date = '2017-01-05')"

start_hour = 9
end_hour = 11

query = f"""
SELECT * 
FROM TRADESDB.trades2017view 
WHERE (Symbol = 'AAPL') 
AND (Date = '2017-01-05') 
AND (toHour(Time) BETWEEN {start_hour} AND {end_hour})
"""

# Execute the query and store the resulting dataframe
data = get_trades(query)

In [3]:
data

Unnamed: 0,Time,Exchange,Symbol,Sale_Condition,Trade_Volume,Trade_Price,Trade_Stop_Stock_Indicator,Trade_Correction_Indicator,Sequence_Number,Trade_Id,Source_of_Trade,Trade_Reporting_Facility,Participant_Timestamp,Trade_Reporting_Facility_TRF_Timestamp,Trade_Through_Exempt_Indicator,Date,YearMonth
0,2017-01-05 09:00:20.099632006,P,AAPL,@ TI,1,116.03,\N,0,2744,77,N,,90020098955008,\N,0,2017-01-05,201701
1,2017-01-05 09:00:20.435260797,P,AAPL,@ TI,1,116.03,\N,0,2751,78,N,,90020434563840,\N,0,2017-01-05,201701
2,2017-01-05 09:01:08.877023201,K,AAPL,@FTI,42,116.07,\N,0,2795,48,N,,90108876758000,\N,1,2017-01-05,201701
3,2017-01-05 09:01:08.877479494,P,AAPL,@FTI,42,116.07,\N,0,2796,79,N,,90108876799744,\N,1,2017-01-05,201701
4,2017-01-05 09:01:08.884202687,P,AAPL,@FTI,66,116.07,\N,0,2797,80,N,,90108883539968,\N,1,2017-01-05,201701
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66957,2017-01-05 11:59:58.045349711,D,AAPL,@,142,116.2963,\N,0,825691,13265,N,Q,115958036000000,115958045265503,0,2017-01-05,201701
66958,2017-01-05 11:59:58.225886059,D,AAPL,@ I,15,116.2917,\N,0,825700,1261,N,N,115958220000000,\N,0,2017-01-05,201701
66959,2017-01-05 11:59:58.227862659,D,AAPL,@ I,15,116.29,\N,0,825701,13266,N,Q,115958219000000,115958227808353,0,2017-01-05,201701
66960,2017-01-05 11:59:59.341613774,B,AAPL,@ I,15,116.29,\N,0,825738,2570,N,,115959341589348,\N,0,2017-01-05,201701


# More Modular Prompts

In [4]:
data.info( )

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 66962 entries, 0 to 66961
Data columns (total 17 columns):
 #   Column                                  Non-Null Count  Dtype 
---  ------                                  --------------  ----- 
 0   Time                                    66962 non-null  object
 1   Exchange                                66962 non-null  object
 2   Symbol                                  66962 non-null  object
 3   Sale_Condition                          66962 non-null  object
 4   Trade_Volume                            66962 non-null  object
 5   Trade_Price                             66962 non-null  object
 6   Trade_Stop_Stock_Indicator              66962 non-null  object
 7   Trade_Correction_Indicator              66962 non-null  object
 8   Sequence_Number                         66962 non-null  object
 9   Trade_Id                                66962 non-null  object
 10  Source_of_Trade                         66962 non-null  object
 11  Tr

In [6]:
# Select trades from January of 2017 to April of 2017
query = "SELECT * FROM TRADESDB.trades2017view WHERE (Symbol = 'AAPL') AND (Date = '2017-01-05')"

results = client.command(query)

import pandas as pd

# Replace 'raw_data' with the variable containing your data as a list
raw_data = results

# Create the DataFrame
columns = ["Time", "Exchange", "Symbol", "Sale_Condition", "Trade_Volume", "Trade_Price", "Trade_Stop_Stock_Indicator", "Trade_Correction_Indicator", "Sequence_Number", "Trade_Id", "Source_of_Trade", "Trade_Reporting_Facility", "Participant_Timestamp", "Trade_Reporting_Facility_TRF_Timestamp", "Trade_Through_Exempt_Indicator", "Date", "YearMonth"]
df = pd.DataFrame(raw_data, columns=columns)

# Convert columns to proper data types
df['Time'] = pd.to_datetime(df['Time']).dt.tz_convert('America/New_York')
df['Exchange'] = df['Exchange'].astype('category')
df['Symbol'] = df['Symbol'].astype('category')
df['Sale_Condition'] = df['Sale_Condition'].astype('category')
df['Trade_Volume'] = df['Trade_Volume'].astype('UInt64')
df['Trade_Price'] = df['Trade_Price'].astype('float64')
df['Trade_Stop_Stock_Indicator'] = df['Trade_Stop_Stock_Indicator'].astype('category')
df['Trade_Correction_Indicator'] = df['Trade_Correction_Indicator'].astype('UInt8')
df['Sequence_Number'] = df['Sequence_Number'].astype('UInt64')
df['Trade_Id'] = df['Trade_Id'].astype('UInt64')
df['Source_of_Trade'] = df['Source_of_Trade'].astype('category')
df['Trade_Reporting_Facility'] = df['Trade_Reporting_Facility'].astype('category')
df['Participant_Timestamp'] = df['Participant_Timestamp'].astype('UInt64')
df['Trade_Reporting_Facility_TRF_Timestamp'] = df['Trade_Reporting_Facility_TRF_Timestamp'].astype('UInt64')
df['Trade_Through_Exempt_Indicator'] = df['Trade_Through_Exempt_Indicator'].astype('UInt64')
df['Date'] = pd.to_datetime(df['Date']).dt.date
df['YearMonth'] = df['YearMonth'].astype('string')

# Set the index
df.set_index(['Symbol', 'Time'], inplace=True)
df = df.sort_values(by=['Symbol', 'Time'])
df

Code: 201. DB::Exception: Quota for user `mhaggerty` for 86400s has been exceeded: result_rows = 1000042/1000000. Interval will end at 2023-05-05 00:00:00. Name of quota template: `dbuser`. (QUOTA_EXPIRED) (version 22.1.3.7 (official build))



DatabaseError: :HTTPDriver for http://ppolak5.ams.stonybrook.edu.:3306 returned response code 500)
 Code: 201. DB::Exception: Quota for user `mhaggerty` for 86400s has been exceeded: result_rows = 1000042/1000000. Interval will end at 2023-05-05 00:00:00. Name of quota template: `dbuser`. (QUOTA_EXPIRED) (version 22.1.3.7 (official build)