In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd

processed_file_path = "/content/drive/MyDrive/btc_project/data/processed/btc_processed_data.csv"

df = pd.read_csv(processed_file_path)

df.head()

Unnamed: 0,date,close,volume,daily_return
0,2025-01-02 00:00:00,94384.176115,23275010000.0,0.009372
1,2025-01-03 00:00:00,96852.146812,45157340000.0,0.026148
2,2025-01-04 00:00:00,98084.342793,35721650000.0,0.012722
3,2025-01-05 00:00:00,98256.738768,20979040000.0,0.001758
4,2025-01-06 00:00:00,98364.589466,20060290000.0,0.001098


In [3]:
df["date"] = pd.to_datetime(df["date"])
df = df.sort_values("date").reset_index(drop=True)

In [4]:
df["MA7"] = df["close"].rolling(window=7).mean()
df["MA30"] = df["close"].rolling(window=30).mean()

df[["close", "MA7", "MA30"]].head(35)

Unnamed: 0,close,MA7,MA30
0,94384.176115,,
1,96852.146812,,
2,98084.342793,,
3,98256.738768,,
4,98364.589466,,
5,102229.394532,,
6,96952.098868,97874.783908,
7,95016.71441,97965.146521,
8,92376.275783,97325.736374,
9,94736.626742,96847.491224,


In [5]:
import numpy as np

window = 14

delta = df["close"].diff()

gain = np.where(delta > 0, delta, 0)
loss = np.where(delta < 0, -delta, 0)

avg_gain = pd.Series(gain).rolling(window).mean()
avg_loss = pd.Series(loss).rolling(window).mean()

rs = avg_gain / avg_loss
df["RSI"] = 100 - (100 / (1 + rs))

df[["close", "RSI"]].head(20)

Unnamed: 0,close,RSI
0,94384.176115,
1,96852.146812,
2,98084.342793,
3,98256.738768,
4,98364.589466,
5,102229.394532,
6,96952.098868,
7,95016.71441,
8,92376.275783,
9,94736.626742,


In [6]:
df["lag_1"] = df["close"].shift(1)
df["lag_3"] = df["close"].shift(3)
df["lag_7"] = df["close"].shift(7)

df[["close", "lag_1", "lag_3", "lag_7"]].head(10)

Unnamed: 0,close,lag_1,lag_3,lag_7
0,94384.176115,,,
1,96852.146812,94384.176115,,
2,98084.342793,96852.146812,,
3,98256.738768,98084.342793,94384.176115,
4,98364.589466,98256.738768,96852.146812,
5,102229.394532,98364.589466,98084.342793,
6,96952.098868,102229.394532,98256.738768,
7,95016.71441,96952.098868,98364.589466,94384.176115
8,92376.275783,95016.71441,102229.394532,96852.146812
9,94736.626742,92376.275783,96952.098868,98084.342793


In [7]:
df["volatility_7"] = df["daily_return"].rolling(window=7).std()
df["volatility_14"] = df["daily_return"].rolling(window=14).std()

df[["daily_return", "volatility_7", "volatility_14"]].head(20)

Unnamed: 0,daily_return,volatility_7,volatility_14
0,0.009372,,
1,0.026148,,
2,0.012722,,
3,0.001758,,
4,0.001098,,
5,0.039291,,
6,-0.051622,0.028669,
7,-0.019962,0.030123,
8,-0.027789,0.029616,
9,0.025551,0.03134,


In [8]:
df.isnull().sum()

Unnamed: 0,0
date,0
close,0
volume,0
daily_return,0
MA7,6
MA30,29
RSI,13
lag_1,1
lag_3,3
lag_7,7


In [9]:
df = df.dropna().reset_index(drop=True)

df.shape

(336, 12)

In [10]:
df.columns

Index(['date', 'close', 'volume', 'daily_return', 'MA7', 'MA30', 'RSI',
       'lag_1', 'lag_3', 'lag_7', 'volatility_7', 'volatility_14'],
      dtype='object')

In [11]:
import os

feature_path = "/content/drive/MyDrive/btc_project/data/processed"
os.makedirs(feature_path, exist_ok=True)

feature_file = f"{feature_path}/btc_feature_data.csv"
df.to_csv(feature_file, index=False)

print("Feature-engineered data saved at:")
print(feature_file)
print("Final shape:", df.shape)

Feature-engineered data saved at:
/content/drive/MyDrive/btc_project/data/processed/btc_feature_data.csv
Final shape: (336, 12)
