 **Step 1 -Fetch Data From Binance**

In [1]:
import os
import requests
import pandas as pd

In [2]:
def fetch_binance(symbol="BTCUSDT", interval="1d", limit=1000):
    url = "https://api.binance.com/api/v3/klines"
    params = {"symbol": symbol, "interval": interval, "limit": limit}
    response = requests.get(url, params=params)
    data = response.json()
    df = pd.DataFrame(data)
    df.columns = ["open_time","open","high","low","close","volume",
                  "close_time","quote_asset_volume","num_trades",
                  "taker_base_volume","taker_quote_volume","ignore"]
    return df

In [3]:
df = fetch_binance("BTCUSDT", "1d", 1000)
print(df.head())
print(df.info())

       open_time            open            high             low  \
0  1677801600000  23465.32000000  23476.95000000  21971.13000000   
1  1677888000000  22354.34000000  22410.00000000  22157.08000000   
2  1677974400000  22346.57000000  22662.09000000  22189.22000000   
3  1678060800000  22430.24000000  22602.19000000  22258.00000000   
4  1678147200000  22409.41000000  22557.91000000  21927.00000000   

            close           volume     close_time   quote_asset_volume  \
0  22354.34000000  319954.19785000  1677887999999  7167184765.74364950   
1  22346.57000000  121257.38132000  1677974399999  2706422995.68025610   
2  22430.24000000  154841.75786000  1678060799999  3473011455.18795160   
3  22410.00000000  203751.82957000  1678147199999  4569102169.18569090   
4  22197.96000000  292519.80912000  1678233599999  6517594938.24605280   

   num_trades taker_base_volume   taker_quote_volume ignore  
0     8214639   156827.31366000  3512245357.18619130      0  
1     4169260    60043

In [4]:

def save_raw_csv(df, symbol="BTCUSDT", interval="1d"):
    os.makedirs("data/raw", exist_ok=True)
    path = f"data/raw/{symbol}_{interval}.csv"
    df.to_csv(path, index=False)
    print(f"Saved: {path}")

if __name__ == "__main__":
    df = fetch_binance(symbol="BTCUSDT", interval="1d", limit=1000)
    save_raw_csv(df)


Saved: data/raw/BTCUSDT_1d.csv


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   open_time           1000 non-null   int64 
 1   open                1000 non-null   object
 2   high                1000 non-null   object
 3   low                 1000 non-null   object
 4   close               1000 non-null   object
 5   volume              1000 non-null   object
 6   close_time          1000 non-null   int64 
 7   quote_asset_volume  1000 non-null   object
 8   num_trades          1000 non-null   int64 
 9   taker_base_volume   1000 non-null   object
 10  taker_quote_volume  1000 non-null   object
 11  ignore              1000 non-null   object
dtypes: int64(3), object(9)
memory usage: 93.9+ KB


**Step 2 -Data Cleaning & Basic Processing**

In [14]:
df["open_time"] = pd.to_datetime(df["open_time"], unit='ms')
df["close_time"]=pd.to_datetime(df["close_time"],unit='ms')
df["close"] = df["close"].astype(float)

In [11]:
df=pd.read_csv(r'C:\Users\HomePC\Crypto-BuySell-Classifier-Model\notebooks\data\raw\BTCUSDT_1d.csv')
df.head()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volume,num_trades,taker_base_volume,taker_quote_volume,ignore
0,1677801600000,23465.32,23476.95,21971.13,22354.34,319954.19785,1677887999999,7167185000.0,8214639,156827.31366,3512245000.0,0
1,1677888000000,22354.34,22410.0,22157.08,22346.57,121257.38132,1677974399999,2706423000.0,4169260,60043.33153,1340205000.0,0
2,1677974400000,22346.57,22662.09,22189.22,22430.24,154841.75786,1678060799999,3473011000.0,4835978,77394.35765,1735989000.0,0
3,1678060800000,22430.24,22602.19,22258.0,22410.0,203751.82957,1678147199999,4569102000.0,6471278,102110.26304,2289889000.0,0
4,1678147200000,22409.41,22557.91,21927.0,22197.96,292519.80912,1678233599999,6517595000.0,7813394,145498.64219,3242137000.0,0


In [13]:
df=df.drop("ignore",axis=1)
df.head()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volume,num_trades,taker_base_volume,taker_quote_volume
0,2023-03-03,23465.32,23476.95,21971.13,22354.34,319954.19785,1677887999999,7167185000.0,8214639,156827.31366,3512245000.0
1,2023-03-04,22354.34,22410.0,22157.08,22346.57,121257.38132,1677974399999,2706423000.0,4169260,60043.33153,1340205000.0
2,2023-03-05,22346.57,22662.09,22189.22,22430.24,154841.75786,1678060799999,3473011000.0,4835978,77394.35765,1735989000.0
3,2023-03-06,22430.24,22602.19,22258.0,22410.0,203751.82957,1678147199999,4569102000.0,6471278,102110.26304,2289889000.0
4,2023-03-07,22409.41,22557.91,21927.0,22197.96,292519.80912,1678233599999,6517595000.0,7813394,145498.64219,3242137000.0


In [15]:
df.head()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volume,num_trades,taker_base_volume,taker_quote_volume
0,2023-03-03,23465.32,23476.95,21971.13,22354.34,319954.19785,2023-03-03 23:59:59.999,7167185000.0,8214639,156827.31366,3512245000.0
1,2023-03-04,22354.34,22410.0,22157.08,22346.57,121257.38132,2023-03-04 23:59:59.999,2706423000.0,4169260,60043.33153,1340205000.0
2,2023-03-05,22346.57,22662.09,22189.22,22430.24,154841.75786,2023-03-05 23:59:59.999,3473011000.0,4835978,77394.35765,1735989000.0
3,2023-03-06,22430.24,22602.19,22258.0,22410.0,203751.82957,2023-03-06 23:59:59.999,4569102000.0,6471278,102110.26304,2289889000.0
4,2023-03-07,22409.41,22557.91,21927.0,22197.96,292519.80912,2023-03-07 23:59:59.999,6517595000.0,7813394,145498.64219,3242137000.0


In [16]:
# convert the columns to float
df["close"] = df["close"].astype(float)
df["num_trades"] = df["num_trades"].astype(float)
df["open"] = df["open"].astype(float)
df["high"] = df["high"].astype(float)
df["low"] = df["low"].astype(float)
df["volume"] = df["volume"].astype(float)
df["quote_asset_volume"] = df["quote_asset_volume"].astype(float)
df["taker_base_volume"] = df["taker_base_volume"].astype(float)
df["taker_quote_volume"] = df["taker_quote_volume"].astype(float)

In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 11 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   open_time           1000 non-null   datetime64[ns]
 1   open                1000 non-null   float64       
 2   high                1000 non-null   float64       
 3   low                 1000 non-null   float64       
 4   close               1000 non-null   float64       
 5   volume              1000 non-null   float64       
 6   close_time          1000 non-null   datetime64[ns]
 7   quote_asset_volume  1000 non-null   float64       
 8   num_trades          1000 non-null   float64       
 9   taker_base_volume   1000 non-null   float64       
 10  taker_quote_volume  1000 non-null   float64       
dtypes: datetime64[ns](2), float64(9)
memory usage: 86.1 KB


In [18]:
# Save processed data

def save_processed_csv(df, symbol, interval):
    os.makedirs("data/processed", exist_ok=True)
    file_path = f"data/processed/{symbol}_{interval}.csv"
    df.to_csv(file_path, index=False)
    print(f"Saved: {file_path}")

save_processed_csv(df, "BTCUSDT", "1d")

Saved: data/processed/BTCUSDT_1d.csv
