In [1]:
import pandas as pd

In [2]:
btcusdt_data = pd.read_csv('btcusdt_data.csv')

# transform data

# remove ignore and unnamed column

In [7]:
# columns to remove
columns_to_remove = ['ignore', 'Unnamed: 0']

# Remove the specified column
btcusdt_data.drop(columns= columns_to_remove, inplace=True)

# Convert 'open_time' and 'close_time' columns to datetime format

In [9]:
# Convert 'open_time' and 'close_time' columns to datetime format
btcusdt_data['open_time'] = pd.to_datetime(btcusdt_data['open_time'], unit='ms')
btcusdt_data['close_time'] = pd.to_datetime(btcusdt_data['close_time'], unit='ms')

# Time Aggregation: Resample data to daily intervals

In [10]:
# Time Aggregation: Resample data to daily intervals
btcusdt_data['date'] = btcusdt_data['open_time'].dt.date  # Extract date from 'open_time'
btcusdt_data_resampled = btcusdt_data.groupby('date').agg({
    'open': 'first',
    'high': 'max',
    'low': 'min',
    'close': 'last',
    'volume': 'sum',
    'quote_volumn': 'sum',
    'number_of_trades': 'sum',
    'taker_buy_volume': 'sum',
    'taker_buy_quote_volume': 'sum'
}).reset_index()

In [11]:
btcusdt_data_resampled.to_csv('btcusdt_data_resampled.csv')

# Feature engineering: Calculate price differences and percentage changes

In [12]:
# Convert 'open' and 'close' columns to numeric type
btcusdt_data_resampled['open'] = pd.to_numeric(btcusdt_data_resampled['open'], errors='coerce')
btcusdt_data_resampled['close'] = pd.to_numeric(btcusdt_data_resampled['close'], errors='coerce')

In [14]:
# Feature engineering: Calculate price differences and percentage changes
btcusdt_data_resampled['price_diff'] = btcusdt_data_resampled['close'] - btcusdt_data_resampled['open']
btcusdt_data_resampled['price_change_percentage'] = ((btcusdt_data_resampled['close'] - btcusdt_data_resampled['open']) / btcusdt_data_resampled['open']) * 100


In [17]:
btcusdt_data_resampled

Unnamed: 0,date,open,high,low,close,volume,quote_volumn,number_of_trades,taker_buy_volume,taker_buy_quote_volume,price_diff,price_change_percentage
0,2024-05-29,67555.18,67826.09,67151.35,67652.42,4160.14605,280818300.0,249113,2033.95134,137288500.0,97.24,0.143942
1,2024-05-30,67652.41,69500.0,67128.0,68352.17,28478.2184,1945001000.0,1272851,14074.36655,961595200.0,699.76,1.034346
2,2024-05-31,68352.17,69044.1,66670.0,67540.01,26690.32184,1812659000.0,1177860,13258.10182,900414200.0,-812.16,-1.188199
3,2024-06-01,67540.01,67900.0,67428.44,67766.85,8837.66133,598305200.0,638484,4235.27045,286752100.0,226.84,0.33586
4,2024-06-02,67766.84,68460.0,67257.47,67775.98,14515.84619,985033800.0,752674,7319.55636,496859500.0,9.14,0.013487


In [18]:
btcusdt_data_resampled.to_csv('btcusdt_data_resampled.csv', index=False)