In [1]:
import requests
import pandas as pd
import numpy as np
import time

def getStockData(API_KEY, symbol, function, outputsize=None, interval=None, month=None, count=0):
    symbolString = ""
    intervalString = ""
    outputsizeString = ""
    monthString = ""

    count = checkRequests(count)
    
    if symbol != None:
        symbolString = f"&symbol={symbol}" 
    if interval != None:
        intervalString = f"&interval={interval}" 
    if outputsize != None:
        outputsizeString = f"&outputsize={outputsize}" 
    if month != None:
        monthString = f"&month={month}" 

    URL = f"https://www.alphavantage.co/query?function={function}{symbolString}{intervalString}{outputsizeString}{monthString}&apikey={API_KEY}"
    response = requests.get(URL)
    data = response.json()
    if data == None:
        print("None")
    data = data.get(f"Time Series ({interval})", {})
    df = pd.DataFrame.from_dict(data, orient="index")
    df.insert(0, "timestamp", df.index)
    return df, count

def getDataForPeriod(API_KEY, symbol, function, interval, yearStart, yearEnd, count):
    dfList = []
    for year in range(yearStart, yearEnd+1):
        print(f"Getting {symbol} data for {year}")
        for month in range(1, 13):
            month = f"{year}-{str(month).zfill(2)}"
            df, count = getStockData(API_KEY=API_KEY, symbol=symbol, function=function, interval=interval, month=month)
            print(df)
            dfList.append(df)
    full_df = pd.concat(dfList, ignore_index=True)
    return full_df, count

def checkRequests(count):
    if count == 75:
        print("API requests reached for min... Waiting")
        time.sleep(62)
        count = 0
    count += 1
    return count

In [None]:
API_KEY = "5HUC90FRQ4H9PK0Q"

# Gets Stock Data 1hr and 1 day, 4hr can be generated from this data. Gets weekly via CSV dataset files.
count = 0
aapl_data_1hr, count = getDataForPeriod(API_KEY=API_KEY, symbol="AAPL", function="TIME_SERIES_INTRADAY", interval="60min", yearStart=2018, yearEnd=2024, count=count)
tsla_data_1hr, count = getDataForPeriod(API_KEY=API_KEY, symbol="TSLA", function="TIME_SERIES_INTRADAY", interval="60min", yearStart=2018, yearEnd=2024, count=count)
amzn_data_1hr, count = getDataForPeriod(API_KEY=API_KEY, symbol="AMZN", function="TIME_SERIES_INTRADAY", interval="60min", yearStart=2018, yearEnd=2024, count=count)

aapl_data_daily, count = getStockData(API_KEY=API_KEY, symbol="AAPL", function="TIME_SERIES_DAILY", outputsize="full", interval="Daily", count=count)
tsla_data_daily, count = getStockData(API_KEY=API_KEY, symbol="TSLA", function="TIME_SERIES_DAILY", outputsize="full", interval="Daily", count=count)
amzn_data_daily, count = getStockData(API_KEY=API_KEY, symbol="AMZN", function="TIME_SERIES_DAILY", outputsize="full", interval="Daily", count=count)

aapl_data_weekly = pd.read_csv("weekly_AAPL.csv")
tsla_data_weekly = pd.read_csv("weekly_TSLA.csv")
amzn_data_weekly = pd.read_csv("weekly_AMZN.csv")

Getting AAPL data for 2018
{'2018-01-31 19:00:00': {'1. open': '39.1727', '2. high': '39.2103', '3. low': '39.1022', '4. close': '39.2103', '5. volume': '283016'}, '2018-01-31 18:00:00': {'1. open': '39.2291', '2. high': '39.2902', '3. low': '39.0857', '4. close': '39.1633', '5. volume': '253676'}, '2018-01-31 17:00:00': {'1. open': '39.1680', '2. high': '39.2878', '3. low': '39.1374', '4. close': '39.2197', '5. volume': '383444'}, '2018-01-31 16:00:00': {'1. open': '39.3536', '2. high': '39.3607', '3. low': '39.1257', '4. close': '39.1374', '5. volume': '13960772'}, '2018-01-31 15:00:00': {'1. open': '39.2173', '2. high': '39.3971', '3. low': '39.1844', '4. close': '39.3536', '5. volume': '21420456'}, '2018-01-31 14:00:00': {'1. open': '39.2502', '2. high': '39.3348', '3. low': '39.1257', '4. close': '39.2173', '5. volume': '13282256'}, '2018-01-31 13:00:00': {'1. open': '39.1750', '2. high': '39.2808', '3. low': '39.1680', '4. close': '39.2500', '5. volume': '7468856'}, '2018-01-31 1

In [4]:
btc_data_1hr, count = getDataForPeriod(API_KEY=API_KEY, symbol="BTC", function="CRYPTO_INTRADAY", market="USD", interval="60min", month="2018-01")

Unnamed: 0,timestamp,1. open,2. high,3. low,4. close,5. volume
0,2018-01-31 19:00:00,39.1727,39.2103,39.1022,39.2103,283016
1,2018-01-31 18:00:00,39.2291,39.2902,39.0857,39.1633,253676
2,2018-01-31 17:00:00,39.1680,39.2878,39.1374,39.2197,383444
3,2018-01-31 16:00:00,39.3536,39.3607,39.1257,39.1374,13960772
4,2018-01-31 15:00:00,39.2173,39.3971,39.1844,39.3536,21420456
...,...,...,...,...,...,...
8395,2024-12-20 17:00:00,254.6454,273.0769,212.3478,254.8002,19979411
8396,2024-12-20 16:00:00,254.3107,254.7802,218.9012,254.6454,112673002
8397,2024-12-20 15:00:00,253.3318,254.7203,252.2330,254.3207,11582497
8398,2024-12-20 14:00:00,252.5626,253.5416,252.4028,253.3246,3348461


In [26]:
aapl_data_1hr.dtypes

timestamp    object
1. open      object
2. high      object
3. low       object
4. close     object
5. volume    object
dtype: object

In [28]:
aapl_data_1hr.isna().any()

timestamp    False
1. open      False
2. high      False
3. low       False
4. close     False
5. volume    False
dtype: bool

In [32]:
tsla_data_1hr.dtypes

timestamp    object
1. open      object
2. high      object
3. low       object
4. close     object
5. volume    object
dtype: object

In [34]:
tsla_data_1hr.isna().any()

timestamp    False
1. open      False
2. high      False
3. low       False
4. close     False
5. volume    False
dtype: bool

In [38]:
amzn_data_1hr.dtypes

timestamp    object
1. open      object
2. high      object
3. low       object
4. close     object
5. volume    object
dtype: object

In [40]:
amzn_data_1hr.isna().any()

timestamp    False
1. open      False
2. high      False
3. low       False
4. close     False
5. volume    False
dtype: bool

In [30]:
aapl_data_1hr.to_csv("Datasets/hourly_AAPL.csv", index=False)

In [36]:
tsla_data_1hr.to_csv("Datasets/hourly_TSLA.csv", index=False)

In [42]:
amzn_data_1hr.to_csv("Datasets/hourly_AMZN.csv", index=False)

In [44]:
aapl_data_daily.dtypes

timestamp    object
1. open      object
2. high      object
3. low       object
4. close     object
5. volume    object
dtype: object

In [46]:
aapl_data_daily.isna().any()

timestamp    False
1. open      False
2. high      False
3. low       False
4. close     False
5. volume    False
dtype: bool

In [50]:
tsla_data_daily.dtypes

timestamp    object
1. open      object
2. high      object
3. low       object
4. close     object
5. volume    object
dtype: object

In [52]:
tsla_data_daily.isna().any()

timestamp    False
1. open      False
2. high      False
3. low       False
4. close     False
5. volume    False
dtype: bool

In [56]:
amzn_data_daily.dtypes

timestamp    object
1. open      object
2. high      object
3. low       object
4. close     object
5. volume    object
dtype: object

In [58]:
amzn_data_daily.isna().any()

timestamp    False
1. open      False
2. high      False
3. low       False
4. close     False
5. volume    False
dtype: bool

In [48]:
aapl_data_daily.to_csv("Datasets/daily_AAPL.csv", index=False)

In [54]:
tsla_data_daily.to_csv("Datasets/daily_TSLA.csv", index=False)

In [60]:
amzn_data_daily.to_csv("Datasets/daily_AMZN.csv", index=False)

In [62]:
aapl_data_1hr

Unnamed: 0,timestamp,1. open,2. high,3. low,4. close,5. volume
0,2018-01-31 19:00:00,39.1727,39.2103,39.1022,39.2103,283016
1,2018-01-31 18:00:00,39.2291,39.2902,39.0857,39.1633,253676
2,2018-01-31 17:00:00,39.1680,39.2878,39.1374,39.2197,383444
3,2018-01-31 16:00:00,39.3536,39.3607,39.1257,39.1374,13960772
4,2018-01-31 15:00:00,39.2173,39.3971,39.1844,39.3536,21420456
...,...,...,...,...,...,...
8395,2024-12-20 17:00:00,254.6454,273.0769,212.3478,254.8002,19979411
8396,2024-12-20 16:00:00,254.3107,254.7802,218.9012,254.6454,112673002
8397,2024-12-20 15:00:00,253.3318,254.7203,252.2330,254.3207,11582497
8398,2024-12-20 14:00:00,252.5626,253.5416,252.4028,253.3246,3348461


In [77]:
aapl_data_1hr["timestamp"] = pd.to_datetime(aapl_data_1hr["timestamp"])
aapl_data_1hr.set_index("timestamp", inplace=True)
cols = ["1. open", "2. high", "3. low", "4. close", "5. volume"]
aapl_data_1hr[cols] = aapl_data_1hr[cols].apply(pd.to_numeric)
aapl_data_1hr.dtypes

1. open      float64
2. high      float64
3. low       float64
4. close     float64
5. volume      int64
dtype: object

In [79]:
aapl_data_1hr.dtypes

1. open      float64
2. high      float64
3. low       float64
4. close     float64
5. volume      int64
dtype: object

In [85]:
aapl_data_4hr = aapl_data_1hr.resample("4h").agg({
    "1. open": "first",
    "2. high": "max",
    "3. low": "min",
    "4. close": "last",
    "5. volume": "sum"
}).dropna()

aapl_data_4hr.reset_index(inplace=True)

In [87]:
aapl_data_4hr

Unnamed: 0,timestamp,1. open,2. high,3. low,4. close,5. volume
0,2018-01-23 16:00:00,41.5860,41.6401,41.4756,41.6401,10440028
1,2018-01-24 04:00:00,41.6518,41.7340,41.6189,41.6401,310672
2,2018-01-24 08:00:00,41.6542,41.8233,41.0079,41.1207,89375092
3,2018-01-24 12:00:00,41.1184,41.2030,40.7001,40.9398,73016888
4,2018-01-24 16:00:00,40.9280,40.9703,40.8528,40.9257,21795676
...,...,...,...,...,...,...
2112,2024-12-30 16:00:00,251.9345,252.1231,250.9287,251.5937,16179766
2113,2024-12-31 04:00:00,251.8734,253.2019,251.3939,252.5227,83676
2114,2024-12-31 08:00:00,252.7225,253.2019,250.7346,251.0393,8263748
2115,2024-12-31 12:00:00,251.0443,251.2541,249.1564,250.1053,17835148


In [89]:
tsla_data_1hr["timestamp"] = pd.to_datetime(tsla_data_1hr["timestamp"])
tsla_data_1hr.set_index("timestamp", inplace=True)
cols = ["1. open", "2. high", "3. low", "4. close", "5. volume"]
tsla_data_1hr[cols] = tsla_data_1hr[cols].apply(pd.to_numeric)
tsla_data_1hr.dtypes

tsla_data_4hr = tsla_data_1hr.resample("4h").agg({
    "1. open": "first",
    "2. high": "max",
    "3. low": "min",
    "4. close": "last",
    "5. volume": "sum"
}).dropna()

tsla_data_4hr.reset_index(inplace=True)

In [91]:
tsla_data_4hr

Unnamed: 0,timestamp,1. open,2. high,3. low,4. close,5. volume
0,2018-01-23 12:00:00,23.5940,23.6468,23.4940,23.5200,12129945
1,2018-01-23 16:00:00,23.5193,23.5787,23.4613,23.5787,1126290
2,2018-01-24 04:00:00,23.5600,23.7000,23.5600,23.6733,87735
3,2018-01-24 08:00:00,23.6667,23.7333,23.0200,23.1827,37447485
4,2018-01-24 12:00:00,23.1827,23.1913,22.9013,23.0613,24008400
...,...,...,...,...,...,...
2119,2024-12-30 16:00:00,417.2800,426.7017,412.0100,412.0300,8671718
2120,2024-12-31 04:00:00,414.0100,426.5000,412.8600,421.3400,1390593
2121,2024-12-31 08:00:00,424.7700,427.9300,412.1350,416.0791,33692816
2122,2024-12-31 12:00:00,415.9200,416.8300,402.5400,403.6400,34678719


In [93]:
amzn_data_1hr["timestamp"] = pd.to_datetime(amzn_data_1hr["timestamp"])
amzn_data_1hr.set_index("timestamp", inplace=True)
cols = ["1. open", "2. high", "3. low", "4. close", "5. volume"]
amzn_data_1hr[cols] = amzn_data_1hr[cols].apply(pd.to_numeric)
amzn_data_1hr.dtypes

amzn_data_4hr = amzn_data_1hr.resample("4h").agg({
    "1. open": "first",
    "2. high": "max",
    "3. low": "min",
    "4. close": "last",
    "5. volume": "sum"
}).dropna()

amzn_data_4hr.reset_index(inplace=True)

In [95]:
amzn_data_4hr

Unnamed: 0,timestamp,1. open,2. high,3. low,4. close,5. volume
0,2018-01-23 12:00:00,67.2180,68.2450,67.1625,68.1525,25547620
1,2018-01-23 16:00:00,68.1270,68.2910,67.3755,68.0585,6055040
2,2018-01-24 04:00:00,68.2500,68.4645,68.2500,68.4300,186960
3,2018-01-24 08:00:00,68.4350,69.4080,67.5810,68.0170,47351340
4,2018-01-24 12:00:00,68.0000,68.4280,66.9000,67.8755,41679900
...,...,...,...,...,...,...
2189,2024-12-30 16:00:00,221.3000,221.4000,220.4627,220.9200,10967043
2190,2024-12-31 04:00:00,221.0500,223.2600,220.9600,222.7500,105605
2191,2024-12-31 08:00:00,221.0162,223.3500,218.7850,221.0000,7799531
2192,2024-12-31 12:00:00,221.0380,221.3400,218.9400,219.3900,10166759


In [97]:
aapl_data_4hr.to_csv("Datasets/4hourly_AAPL.csv", index=False)

In [99]:
tsla_data_4hr.to_csv("Datasets/4hourly.TSLA.csv", index=False)

In [101]:
amzn_data_daily.to_csv("Datasets/4hourly_AMZN.csv", index=False)

In [103]:
eth_data_1hr = pd.read_csv("Datasets/hourly_ETH.csv")

In [105]:
eth_data_1hr

Unnamed: 0,Date,Symbol,Open,High,Low,Close,Volume ETH,Volume USDT,tradecount
0,2023-10-19 23:00:00,ETHUSDT,1564.22,1567.31,1563.70,1566.57,3403.82350,5.327189e+06,12103
1,2023-10-19 22:00:00,ETHUSDT,1563.45,1568.00,1563.39,1564.22,4833.05790,7.566292e+06,12073
2,2023-10-19 21:00:00,ETHUSDT,1566.51,1567.72,1563.39,1563.46,5240.29830,8.200644e+06,15157
3,2023-10-19 20:00:00,ETHUSDT,1568.00,1573.15,1563.72,1566.51,8725.29650,1.369178e+07,19124
4,2023-10-19 19:00:00,ETHUSDT,1561.66,1569.92,1561.20,1568.00,6092.96190,9.540306e+06,16003
...,...,...,...,...,...,...,...,...,...
53983,2017-08-17 08:00:00,ETHUSDT,307.95,309.97,307.00,308.62,150.75029,4.655946e+04,182
53984,2017-08-17 07:00:00,ETHUSDT,302.68,307.96,302.60,307.96,754.74510,2.297251e+05,198
53985,2017-08-17 06:00:00,ETHUSDT,302.40,304.44,301.90,302.68,303.86672,9.203444e+04,182
53986,2017-08-17 05:00:00,ETHUSDT,301.61,303.28,300.00,303.10,377.67246,1.140433e+05,202


In [107]:
btc_data_1hr = pd.read_csv("Datasets/hourly_BTC.csv")

In [109]:
btc_data_1hr

Unnamed: 0,Date,Symbol,Open,High,Low,Close,Volume BTC,Volume USDT,tradecount
0,2023-10-19 23:00:00,BTCUSDT,28649.64,28717.82,28630.19,28713.71,613.284480,1.758298e+07,27099
1,2023-10-19 22:00:00,BTCUSDT,28604.09,28704.99,28598.60,28649.65,767.855180,2.200420e+07,28479
2,2023-10-19 21:00:00,BTCUSDT,28715.39,28739.73,28604.09,28604.09,1118.516970,3.205666e+07,42623
3,2023-10-19 20:00:00,BTCUSDT,28750.75,28882.07,28684.80,28715.38,1737.147110,4.997623e+07,59830
4,2023-10-19 19:00:00,BTCUSDT,28646.98,28765.76,28645.00,28750.75,1039.209350,2.984059e+07,49985
...,...,...,...,...,...,...,...,...,...
53983,2017-08-17 08:00:00,BTCUSDT,4333.32,4377.85,4333.32,4360.69,0.972807,4.239504e+03,28
53984,2017-08-17 07:00:00,BTCUSDT,4316.62,4349.99,4287.41,4349.99,4.443249,1.924106e+04,25
53985,2017-08-17 06:00:00,BTCUSDT,4330.29,4345.45,4309.37,4324.35,7.229691,3.128231e+04,36
53986,2017-08-17 05:00:00,BTCUSDT,4308.83,4328.69,4291.37,4315.32,23.234916,1.003048e+05,102


In [111]:
btc_data_1hr.isna().any()

Date           False
Symbol         False
Open           False
High           False
Low            False
Close          False
Volume BTC     False
Volume USDT    False
tradecount     False
dtype: bool

In [113]:
eth_data_1hr.isna().any()

Date           False
Symbol         False
Open           False
High           False
Low            False
Close          False
Volume ETH     False
Volume USDT    False
tradecount     False
dtype: bool

In [123]:
btc_data_1hr["Date"] = btc_data_1hr["Date"].str.split(".").str[0]  # Remove milliseconds
btc_data_1hr["Date"] = pd.to_datetime(btc_data_1hr["Date"], errors="coerce")  # Convert to datetime

btc_data_1hr.set_index("Date", inplace=True)
cols = ["Open", "High", "Low", "Close", "Volume BTC", "Volume USDT", "tradecount"]
btc_data_1hr[cols] = btc_data_1hr[cols].apply(pd.to_numeric)
btc_data_1hr.dtypes

btc_data_4hr = btc_data_1hr.resample("4h").agg({
    "Open": "first",
    "High": "max",
    "Low": "min",
    "Close": "last",
    "Volume BTC": "sum",
    "Volume USDT": "sum",
    "tradecount": "sum"
}).dropna().reset_index()

btc_data_daily = btc_data_1hr.resample("D").agg({
    "Open": "first",
    "High": "max",
    "Low": "min",
    "Close": "last",
    "Volume BTC": "sum",
    "Volume USDT": "sum",
    "tradecount": "sum"
}).dropna().reset_index()

btc_data_weekly = btc_data_1hr.resample("W").agg({
    "Open": "first",
    "High": "max",
    "Low": "min",
    "Close": "last",
    "Volume BTC": "sum",
    "Volume USDT": "sum",
    "tradecount": "sum"
}).dropna().reset_index()



In [125]:
btc_data_4hr

Unnamed: 0,Date,Open,High,Low,Close,Volume BTC,Volume USDT,tradecount
0,2017-08-17 04:00:00,4261.48,4349.99,4261.32,4349.99,82.088865,3.531943e+05,334
1,2017-08-17 08:00:00,4333.32,4485.39,4333.32,4427.30,63.619882,2.825012e+05,248
2,2017-08-17 12:00:00,4436.06,4485.39,4333.42,4352.34,174.562001,7.742388e+05,858
3,2017-08-17 16:00:00,4352.33,4354.84,4200.74,4325.23,225.109716,9.652911e+05,986
4,2017-08-17 20:00:00,4307.56,4369.69,4258.56,4285.08,249.769913,1.079545e+06,1001
...,...,...,...,...,...,...,...,...
13507,2023-10-19 04:00:00,28260.00,28349.39,28230.00,28330.82,3083.943090,8.725777e+07,119868
13508,2023-10-19 08:00:00,28330.82,28514.21,28280.96,28450.69,5223.266230,1.483923e+08,197855
13509,2023-10-19 12:00:00,28450.70,28814.14,28420.00,28680.09,10201.176170,2.916293e+08,372489
13510,2023-10-19 16:00:00,28680.09,28916.89,28457.56,28750.75,8031.467960,2.304093e+08,316168


In [127]:
btc_data_4hr.dtypes

Date           datetime64[ns]
Open                  float64
High                  float64
Low                   float64
Close                 float64
Volume BTC            float64
Volume USDT           float64
tradecount              int64
dtype: object

In [131]:
btc_data_daily

Unnamed: 0,Date,Open,High,Low,Close,Volume BTC,Volume USDT,tradecount
0,2017-08-17,4261.48,4485.39,4200.74,4285.08,795.150377,3.454770e+06,3427
1,2017-08-18,4285.08,4371.52,3938.77,4108.37,1199.888264,5.086958e+06,5233
2,2017-08-19,4108.37,4184.69,3850.00,4139.98,381.309763,1.549484e+06,2153
3,2017-08-20,4120.98,4211.08,4032.62,4086.29,467.083022,1.930364e+06,2321
4,2017-08-21,4069.13,4119.62,3911.79,4016.00,691.743060,2.797232e+06,3972
...,...,...,...,...,...,...,...,...
2250,2023-10-15,26852.48,27293.33,26808.25,27154.15,15274.691700,4.126740e+08,682942
2251,2023-10-16,27154.14,30000.00,27112.66,28500.78,78399.224450,2.210786e+09,2289339
2252,2023-10-17,28500.77,28613.65,28069.32,28395.91,38428.445320,1.090742e+09,1345810
2253,2023-10-18,28395.91,28982.36,28142.87,28320.00,32162.475910,9.147735e+08,1215235


In [133]:
btc_data_weekly

Unnamed: 0,Date,Open,High,Low,Close,Volume BTC,Volume USDT,tradecount
0,2017-08-20,4261.48,4485.39,3850.00,4086.29,2843.431426,1.202158e+07,13134
1,2017-08-27,4069.13,4453.91,3400.00,4310.01,4599.396629,1.897898e+07,37826
2,2017-09-03,4310.01,4939.19,4124.54,4509.08,4753.843376,2.158168e+07,35717
3,2017-09-10,4505.00,4788.59,3603.00,4130.37,6382.787745,2.730782e+07,42372
4,2017-09-17,4153.62,4394.59,2817.00,3699.99,8106.705127,3.002680e+07,50423
...,...,...,...,...,...,...,...,...
318,2023-09-24,26527.50,27483.57,26122.08,26248.38,191220.589230,5.139868e+09,5061396
319,2023-10-01,26248.39,28065.51,25990.46,27992.57,189937.169190,5.073579e+09,5966990
320,2023-10-08,27992.58,28580.00,27160.50,27917.05,217523.396380,6.040608e+09,7856366
321,2023-10-15,27917.06,27987.93,26538.66,27154.15,164897.390850,4.470535e+09,6797459


In [135]:
eth_data_1hr = pd.read_csv("Datasets/hourly_ETH.csv")

In [153]:
eth_data_1hr

Unnamed: 0_level_0,Symbol,Open,High,Low,Close,Volume ETH,Volume USDT,tradecount
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-10-19 23:00:00,ETHUSDT,1564.22,1567.31,1563.70,1566.57,3403.82350,5.327189e+06,12103
2023-10-19 22:00:00,ETHUSDT,1563.45,1568.00,1563.39,1564.22,4833.05790,7.566292e+06,12073
2023-10-19 21:00:00,ETHUSDT,1566.51,1567.72,1563.39,1563.46,5240.29830,8.200644e+06,15157
2023-10-19 20:00:00,ETHUSDT,1568.00,1573.15,1563.72,1566.51,8725.29650,1.369178e+07,19124
2023-10-19 19:00:00,ETHUSDT,1561.66,1569.92,1561.20,1568.00,6092.96190,9.540306e+06,16003
...,...,...,...,...,...,...,...,...
2017-08-17 08:00:00,ETHUSDT,307.95,309.97,307.00,308.62,150.75029,4.655946e+04,182
2017-08-17 07:00:00,ETHUSDT,302.68,307.96,302.60,307.96,754.74510,2.297251e+05,198
2017-08-17 06:00:00,ETHUSDT,302.40,304.44,301.90,302.68,303.86672,9.203444e+04,182
2017-08-17 05:00:00,ETHUSDT,301.61,303.28,300.00,303.10,377.67246,1.140433e+05,202


In [155]:
eth_data_1hr.index = pd.to_datetime(eth_data_1hr.index)  # Convert to datetime

cols = ["Open", "High", "Low", "Close", "Volume ETH", "Volume USDT", "tradecount"]
eth_data_1hr[cols] = eth_data_1hr[cols].apply(pd.to_numeric)
eth_data_1hr.dtypes

eth_data_4hr = eth_data_1hr.resample("4h").agg({
    "Open": "first",
    "High": "max",
    "Low": "min",
    "Close": "last",
    "Volume ETH": "sum",
    "Volume USDT": "sum",
    "tradecount": "sum"
}).dropna().reset_index()

eth_data_daily = eth_data_1hr.resample("D").agg({
    "Open": "first",
    "High": "max",
    "Low": "min",
    "Close": "last",
    "Volume ETH": "sum",
    "Volume USDT": "sum",
    "tradecount": "sum"
}).dropna().reset_index()

eth_data_weekly = eth_data_1hr.resample("W").agg({
    "Open": "first",
    "High": "max",
    "Low": "min",
    "Close": "last",
    "Volume ETH": "sum",
    "Volume USDT": "sum",
    "tradecount": "sum"
}).dropna().reset_index()

In [157]:
eth_data_4hr

Unnamed: 0,Date,Open,High,Low,Close,Volume ETH,Volume USDT,tradecount
0,2017-08-17 04:00:00,301.13,307.96,298.00,307.96,1561.95305,4.734877e+05,711
1,2017-08-17 08:00:00,307.95,312.00,307.00,308.95,1177.71088,3.645453e+05,775
2,2017-08-17 12:00:00,308.95,310.51,303.56,307.06,1882.05267,5.786449e+05,1140
3,2017-08-17 16:00:00,307.74,312.18,298.21,301.60,1208.05192,3.702091e+05,957
4,2017-08-17 20:00:00,301.60,310.85,299.01,302.00,1200.94182,3.677683e+05,939
...,...,...,...,...,...,...,...,...
13507,2023-10-19 04:00:00,1549.06,1553.71,1545.80,1551.91,30291.55120,4.698518e+07,53567
13508,2023-10-19 08:00:00,1551.91,1555.36,1541.76,1553.86,29384.06650,4.554548e+07,74596
13509,2023-10-19 12:00:00,1553.86,1565.31,1551.00,1561.20,39291.64770,6.120460e+07,102674
13510,2023-10-19 16:00:00,1561.20,1574.45,1559.86,1568.00,49123.42380,7.691279e+07,102322


In [159]:
eth_data_daily

Unnamed: 0,Date,Open,High,Low,Close,Volume ETH,Volume USDT,tradecount
0,2017-08-17,301.13,312.18,298.00,302.00,7030.71034,2.154655e+06,4522
1,2017-08-18,302.00,311.79,283.94,293.96,9537.84646,2.858947e+06,5658
2,2017-08-19,293.31,299.90,278.00,290.91,2146.19773,6.200226e+05,1795
3,2017-08-20,289.41,300.53,282.85,299.10,2510.13871,7.428479e+05,2038
4,2017-08-21,299.10,346.52,294.60,323.29,5219.44542,1.689472e+06,3925
...,...,...,...,...,...,...,...,...
2250,2023-10-15,1554.94,1567.62,1547.44,1557.77,120311.27380,1.872662e+08,316951
2251,2023-10-16,1557.78,1639.43,1555.06,1599.42,418510.86440,6.651250e+08,729569
2252,2023-10-17,1599.41,1600.53,1551.08,1565.01,234817.06990,3.700322e+08,475213
2253,2023-10-18,1565.01,1585.42,1555.88,1563.44,177750.22530,2.793579e+08,440577


In [161]:
eth_data_weekly

Unnamed: 0,Date,Open,High,Low,Close,Volume ETH,Volume USDT,tradecount
0,2017-08-20,301.13,312.18,278.00,299.10,2.122489e+04,6.376472e+06,14013
1,2017-08-27,299.10,348.13,144.21,348.13,4.523389e+04,1.456741e+07,42362
2,2017-09-03,348.11,394.39,320.08,341.77,3.388641e+04,1.229073e+07,35671
3,2017-09-10,339.29,344.55,235.55,293.50,3.548523e+04,1.071100e+07,33803
4,2017-09-17,293.51,316.46,192.00,257.55,5.825963e+04,1.499828e+07,44796
...,...,...,...,...,...,...,...,...
318,2023-09-24,1622.48,1669.50,1567.77,1580.71,1.263737e+06,2.045249e+09,2347428
319,2023-10-01,1580.71,1751.00,1563.01,1733.79,1.522616e+06,2.495363e+09,3100790
320,2023-10-08,1733.80,1744.00,1606.73,1632.84,1.508277e+06,2.494967e+09,3603939
321,2023-10-15,1632.85,1636.00,1521.00,1557.77,1.491108e+06,2.333403e+09,3494816


In [163]:
eth_data_4hr.to_csv("Datasets/4hourly_ETH.csv")

In [165]:
eth_data_daily.to_csv("Datasets/daily_ETH.csv")

In [167]:
eth_data_weekly.to_csv("Datasets/weekly_ETH.csv")

In [169]:
btc_data_4hr.to_csv("Datasets/4hourly_BTC.csv")

In [171]:
btc_data_daily.to_csv("Datasets/daily_BTC.csv")

In [173]:
btc_data_weekly.to_csv("Datasets/weekly_BTC.csv")