# Database Access with ClickHouse Connect

## Setup
Please install the following packages:
- clickhouse_connect
- python-dotenv

Need to open ssh tunnel to access remotely
- open terminal/command prompt
- run(with your username replaced): ssh -L 8123:localhost:8123 <user>@ppolak5.ams.stonybrook.edu

Your env file should look like this for remote access (just switch the comment if you're on campus):

#host= "ppolak5.ams.stonybrook.edu." 

host = "localhost"

server_user= "<server_username>"

server_password= "<server_password>"

db_user=  "<db_username>"

db_pass= "<db_password>"

In [None]:
ssh -L 3306:localhost:3306 mhaggerty@ppolak5.ams.stonybrook.edu

# Custom SQL prompts to Clickhouse Database

In [2]:
# import helper functions
from utils.clickhouse_query import *

# Note - there is restriction to 1,000,000 rows per day/per user - so it's wise to limit the query to a specific time range for testing purposes - aggregation can also be used to reduce the number of rows returned
# Here is a way to restrict the query to a specific time range
start_hour = 9
end_hour = 10

# Define the query - this query grabs trades data from AAPL on 2017-01-05 between 9am and 11am
query = f"""
SELECT * 
FROM TRADESDB.trades2017view 
WHERE (Symbol = 'AAPL') 
AND (Date = '2017-01-05') 
AND (toHour(Time) BETWEEN {start_hour} AND {end_hour})
"""

# Execute the query and store the resulting dataframe
data = get_trades(query)

In [2]:
data

Unnamed: 0,Time,Exchange,Symbol,Sale_Condition,Trade_Volume,Trade_Price,Trade_Stop_Stock_Indicator,Trade_Correction_Indicator,Sequence_Number,Trade_Id,Source_of_Trade,Trade_Reporting_Facility,Participant_Timestamp,Trade_Reporting_Facility_TRF_Timestamp,Trade_Through_Exempt_Indicator,Date,YearMonth
0,2017-01-05 09:00:20.099632006,P,AAPL,@ TI,1,116.03,,0,2744,77,N,,9.002010e+13,,0,2017-01-05,201701
1,2017-01-05 09:00:20.435260797,P,AAPL,@ TI,1,116.03,,0,2751,78,N,,9.002043e+13,,0,2017-01-05,201701
2,2017-01-05 09:01:08.877023201,K,AAPL,@FTI,42,116.07,,0,2795,48,N,,9.010888e+13,,1,2017-01-05,201701
3,2017-01-05 09:01:08.877479494,P,AAPL,@FTI,42,116.07,,0,2796,79,N,,9.010888e+13,,1,2017-01-05,201701
4,2017-01-05 09:01:08.884202687,P,AAPL,@FTI,66,116.07,,0,2797,80,N,,9.010888e+13,,1,2017-01-05,201701
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47488,2017-01-05 10:59:59.949257478,K,AAPL,@F,200,116.33,,0,532527,6853,N,,1.059599e+14,,1,2017-01-05,201701
47489,2017-01-05 10:59:59.950371094,Z,AAPL,@F,100,116.33,,0,532529,4840,N,,1.059599e+14,,1,2017-01-05,201701
47490,2017-01-05 10:59:59.950377156,Z,AAPL,@F,100,116.33,,0,532530,4841,N,,1.059599e+14,,1,2017-01-05,201701
47491,2017-01-05 10:59:59.950397053,Z,AAPL,@F,300,116.33,,0,532531,4842,N,,1.059599e+14,,1,2017-01-05,201701


In [3]:
data

Unnamed: 0,Time,Exchange,Symbol,Sale_Condition,Trade_Volume,Trade_Price,Trade_Stop_Stock_Indicator,Trade_Correction_Indicator,Sequence_Number,Trade_Id,Source_of_Trade,Trade_Reporting_Facility,Participant_Timestamp,Trade_Reporting_Facility_TRF_Timestamp,Trade_Through_Exempt_Indicator,Date,YearMonth
0,2017-01-05 09:00:20.099632006,P,AAPL,@ TI,1,116.03,\N,0,2744,77,N,,90020098955008,\N,0,2017-01-05,201701
1,2017-01-05 09:00:20.435260797,P,AAPL,@ TI,1,116.03,\N,0,2751,78,N,,90020434563840,\N,0,2017-01-05,201701
2,2017-01-05 09:01:08.877023201,K,AAPL,@FTI,42,116.07,\N,0,2795,48,N,,90108876758000,\N,1,2017-01-05,201701
3,2017-01-05 09:01:08.877479494,P,AAPL,@FTI,42,116.07,\N,0,2796,79,N,,90108876799744,\N,1,2017-01-05,201701
4,2017-01-05 09:01:08.884202687,P,AAPL,@FTI,66,116.07,\N,0,2797,80,N,,90108883539968,\N,1,2017-01-05,201701
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66957,2017-01-05 11:59:58.045349711,D,AAPL,@,142,116.2963,\N,0,825691,13265,N,Q,115958036000000,115958045265503,0,2017-01-05,201701
66958,2017-01-05 11:59:58.225886059,D,AAPL,@ I,15,116.2917,\N,0,825700,1261,N,N,115958220000000,\N,0,2017-01-05,201701
66959,2017-01-05 11:59:58.227862659,D,AAPL,@ I,15,116.29,\N,0,825701,13266,N,Q,115958219000000,115958227808353,0,2017-01-05,201701
66960,2017-01-05 11:59:59.341613774,B,AAPL,@ I,15,116.29,\N,0,825738,2570,N,,115959341589348,\N,0,2017-01-05,201701


In [6]:
x = 1000000/50000
x

20.0

In [4]:
data.info( )

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47493 entries, 0 to 47492
Data columns (total 17 columns):
 #   Column                                  Non-Null Count  Dtype         
---  ------                                  --------------  -----         
 0   Time                                    47493 non-null  datetime64[ns]
 1   Exchange                                47493 non-null  object        
 2   Symbol                                  47493 non-null  object        
 3   Sale_Condition                          47493 non-null  object        
 4   Trade_Volume                            47493 non-null  int64         
 5   Trade_Price                             47493 non-null  float64       
 6   Trade_Stop_Stock_Indicator              47493 non-null  object        
 7   Trade_Correction_Indicator              47493 non-null  int64         
 8   Sequence_Number                         47493 non-null  int64         
 9   Trade_Id                                47493 non-