In [3]:
import requests
import pandas as pd

# Function to fetch cryptocurrency data from Binance API
def fetch_cryptocurrency_data(symbol, interval, limit):
    base_url = "https://api.binance.com/api/v3/klines"
    params = {
        "symbol": symbol + "USDT",  # Cryptocurrency symbol with trading pair
        "interval": interval,       # Desired interval: 1h (hourly), 1d (daily), 1w (weekly)
        "limit": limit              # Number of data points to retrieve
    }

    response = requests.get(base_url, params=params)
    data = response.json()
    return data

# Fetch and store data for Bitcoin (BTC)
btc_data = fetch_cryptocurrency_data("BTC", "1d", 1000)
btc_df = pd.DataFrame(btc_data, columns=["timestamp", "open", "high", "low", "close", "volume", "close_time", "quote_asset_volume", "number_of_trades", "taker_buy_base_asset_volume", "taker_buy_quote_asset_volume", "ignore"])
btc_df = btc_df[["timestamp", "open", "high", "low", "close", "volume"]]  # Select relevant columns
btc_df.to_csv("btc_data.csv", index=False)

# Fetch and store data for Ethereum (ETH)
btc_data = fetch_cryptocurrency_data("ETC", "1d", 1000)
btc_df = pd.DataFrame(btc_data, columns=["timestamp", "open", "high", "low", "close", "volume", "close_time", "quote_asset_volume", "number_of_trades", "taker_buy_base_asset_volume", "taker_buy_quote_asset_volume", "ignore"])
btc_df = btc_df[["timestamp", "open", "high", "low", "close", "volume"]]  # Select relevant columns
btc_df.to_csv("etc_data.csv", index=False)

# Fetch and store data for Litecoin (LTC)
btc_data = fetch_cryptocurrency_data("LTC", "1d", 1000)
btc_df = pd.DataFrame(btc_data, columns=["timestamp", "open", "high", "low", "close", "volume", "close_time", "quote_asset_volume", "number_of_trades", "taker_buy_base_asset_volume", "taker_buy_quote_asset_volume", "ignore"])
btc_df = btc_df[["timestamp", "open", "high", "low", "close", "volume"]]  # Select relevant columns
btc_df.to_csv("ltc_data.csv", index=False)



In [4]:
# Import necessary libraries
import pandas as pd
import numpy as np

# Load the collected data into dataframes
btc_data = pd.read_csv('btc_data.csv')
eth_data = pd.read_csv('etc_data.csv')
ltc_data = pd.read_csv('ltc_data.csv')

# Feature selection
features = ['timestamp', 'open', 'high', 'low', 'close', 'volume']

# Keep only the selected features
btc_data = btc_data[features]
eth_data = eth_data[features]
ltc_data = ltc_data[features]

# Handling missing values with forward fill
btc_data.fillna(method='ffill', inplace=True)
eth_data.fillna(method='ffill', inplace=True)
ltc_data.fillna(method='ffill', inplace=True)

In [26]:
btc_data.head()

Unnamed: 0,timestamp,open,high,low,close,volume
0,1606694400000,18185.0,19863.16,18184.99,19695.87,115463.466888
1,1606780800000,19695.87,19888.0,18001.12,18764.96,127698.762652
2,1606867200000,18764.96,19342.0,18330.0,19204.09,75911.013478
3,1606953600000,19204.08,19598.0,18867.2,19421.9,66689.391279
4,1607040000000,19422.34,19527.0,18565.31,18650.52,71283.6682


In [27]:
eth_data.head()

Unnamed: 0,timestamp,open,high,low,close,volume
0,1606694400000,6.3977,6.8001,6.3032,6.7458,4835013.78
1,1606780800000,6.7428,7.0,5.9869,6.1466,6992281.84
2,1606867200000,6.142,6.3806,6.0369,6.2376,3198762.18
3,1606953600000,6.2301,6.6079,6.176,6.587,4271683.49
4,1607040000000,6.5891,6.592,5.8789,5.9515,3240602.71


In [28]:
ltc_data.head()

Unnamed: 0,timestamp,open,high,low,close,volume
0,1606694400000,78.89,88.46,76.63,87.65,2118966.0
1,1606780800000,87.66,93.01,79.56,85.08,3600461.0
2,1606867200000,85.07,90.63,82.61,88.9,2135078.0
3,1606953600000,88.89,91.5,85.46,89.0,1690248.0
4,1607040000000,89.01,89.84,77.8,79.52,1701698.0


The timestamp retrieved from the Binance API is in Unix timestamp format, which represents the number of milliseconds (or seconds) since January 1, 1970 (UTC), also known as the "epoch" time. The ffill() method is used to handle missing values, which basically replaces the NULL values with the value from the previous row (or previous column, if the axis parameter is set to 'columns' ).