In [18]:
!pip install yfinance




In [57]:
import pandas as pd
import yfinance as yf
import numpy as np
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split

In [46]:
ticker = "BTC-USD"
unit_hours = 6
days_begin = 10
days_end = 10

end_date = datetime.utcnow() - timedelta(days_begin)
start_date = end_date - timedelta(days=days_end)
print(end_date, start_date)


train_data = yf.download(
    tickers=ticker,
    start=start_date.strftime("%Y-%m-%d"),
    end=end_date.strftime("%Y-%m-%d"),
    interval="15m",
    progress=False,
    group_by='ticker',
    auto_adjust=False
)

train_data.columns = train_data.columns.droplevel(0)


2025-08-05 08:15:49.050673 2025-07-26 08:15:49.050673


In [47]:
ticker = "BTC-USD"
unit_hours = 6
days_begin = 0
days_end = 10

end_date = datetime.utcnow() - timedelta(days_begin)
start_date = end_date - timedelta(days=days_end)
print(end_date, start_date)


test_data = yf.download(
    tickers=ticker,
    start=start_date.strftime("%Y-%m-%d"),
    end=end_date.strftime("%Y-%m-%d"),
    interval="15m",
    progress=False,
    group_by='ticker',
    auto_adjust=False
)

test_data.columns = test_data.columns.droplevel(0)


2025-08-15 08:15:50.790006 2025-08-05 08:15:50.790006


In [85]:
train_data_resampled = train_data.resample(f'{unit_hours}h').agg({
    'Open': 'first',
    'High': 'max',
    'Low': 'min',
    'Close': 'last',
    'Volume': 'sum'
}).dropna()

test_data_resampled = test_data.resample(f'{unit_hours}h').agg({
    'Open': 'first',
    'High': 'max',
    'Low': 'min',
    'Close': 'last',
    'Volume': 'sum'
}).dropna()


train_data_resampled = train_data_resampled[['Open', 'High', 'Low', 'Close', 'Volume']]

test_data_resampled = test_data_resampled[['Open', 'High', 'Low', 'Close', 'Volume']]

train_csv_filename = "train_dataset.csv"
test_csv_filename = "test_dataset.csv"
train_data_resampled.to_csv(train_csv_filename, index=False)
test_data_resampled.to_csv(test_csv_filename, index=False)


In [81]:
train_data_resampled.head()

Price,Open,High,Low,Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-07-26 00:00:00+00:00,117625.382812,117768.789062,117201.15625,117498.921875,7876534272
2025-07-26 06:00:00+00:00,117495.921875,118219.609375,117357.320312,117827.984375,8697057280
2025-07-26 12:00:00+00:00,117817.6875,118274.765625,117805.625,118068.5625,8017883136
2025-07-26 18:00:00+00:00,118072.210938,118324.984375,117929.15625,117946.296875,8409116672
2025-07-27 00:00:00+00:00,117991.445312,118384.382812,117872.5,118334.046875,9951830016


In [82]:
train_data.head(), test_data.head()

(Price                               Open           High            Low  \
 Datetime                                                                 
 2025-07-26 00:00:00+00:00  117625.382812  117625.382812  117388.914062   
 2025-07-26 00:15:00+00:00  117408.843750  117563.890625  117408.843750   
 2025-07-26 00:30:00+00:00  117566.320312  117623.468750  117515.257812   
 2025-07-26 00:45:00+00:00  117528.601562  117528.601562  117387.656250   
 2025-07-26 01:00:00+00:00  117515.539062  117568.289062  117483.140625   
 
 Price                              Close      Adj Close      Volume  
 Datetime                                                             
 2025-07-26 00:00:00+00:00  117388.914062  117388.914062    75292672  
 2025-07-26 00:15:00+00:00  117548.960938  117548.960938  1150148608  
 2025-07-26 00:30:00+00:00  117533.687500  117533.687500  1059348480  
 2025-07-26 00:45:00+00:00  117496.640625  117496.640625  1010819072  
 2025-07-26 01:00:00+00:00  117568.289062  1175

In [83]:
train_array = train_data_resampled.to_numpy()
train_tensor = torch.from_numpy(train_array).squeeze()
train_tensor

tensor([[1.1763e+05, 1.1777e+05, 1.1720e+05, 1.1750e+05, 7.8765e+09],
        [1.1750e+05, 1.1822e+05, 1.1736e+05, 1.1783e+05, 8.6971e+09],
        [1.1782e+05, 1.1827e+05, 1.1781e+05, 1.1807e+05, 8.0179e+09],
        [1.1807e+05, 1.1832e+05, 1.1793e+05, 1.1795e+05, 8.4091e+09],
        [1.1799e+05, 1.1838e+05, 1.1787e+05, 1.1833e+05, 9.9518e+09],
        [1.1833e+05, 1.1844e+05, 1.1791e+05, 1.1809e+05, 1.0806e+10],
        [1.1812e+05, 1.1954e+05, 1.1793e+05, 1.1922e+05, 1.7502e+10],
        [1.1927e+05, 1.1979e+05, 1.1877e+05, 1.1946e+05, 1.8477e+10],
        [1.1944e+05, 1.1979e+05, 1.1902e+05, 1.1961e+05, 2.1284e+10],
        [1.1955e+05, 1.1961e+05, 1.1853e+05, 1.1881e+05, 2.2408e+10],
        [1.1882e+05, 1.1908e+05, 1.1766e+05, 1.1768e+05, 2.1088e+10],
        [1.1767e+05, 1.1832e+05, 1.1744e+05, 1.1784e+05, 1.9176e+10],
        [1.1802e+05, 1.1884e+05, 1.1748e+05, 1.1882e+05, 2.1980e+10],
        [1.1879e+05, 1.1927e+05, 1.1820e+05, 1.1859e+05, 1.7707e+10],
        [1.1859e+05,

In [84]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np

# ====== Model ======
class StockDataset(Dataset):
    def __init__(self, data_input, lookback=5):
        self.lookback = lookback

        if isinstance(data_input, pd.DataFrame):
            data = torch.tensor(data_input[['Open', 'High', 'Low', 'Close', 'Volume']].values, dtype=torch.float32).squeeze()
        elif isinstance(data_input, np.ndarray):
            data = torch.tensor(data_input, dtype=torch.float32).squeeze()
        elif torch.is_tensor(data_input):
            data = torch.tensor(data_input, dtype = torch.float32).squeeze()
        else:
            raise TypeError("Input must be DataFrame, NumPy array, or Tensor.")

        self.X, self.y = [], []
        for i in range(len(data) - lookback):
            self.X.append(data[i:i+lookback])
            self.y.append(data[i+lookback][[0, 1, 3]])  # Open, High, Close

        self.X = torch.stack(self.X)
        self.y = torch.stack(self.y)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]



class StockPredictor(nn.Module):
    def __init__(self, input_size=5, hidden_size=64, num_layers=2, output_size=3):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])  # last time step
        return out

# ====== Training ======
def train_model(train_df, epochs=50, batch_size=16, lookback=5, lr=1e-3):
    dataset = StockDataset(train_df, lookback)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    model = StockPredictor()
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    for epoch in range(epochs):
        for X, y in loader:
            pred = model(X)
            loss = criterion(pred, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        if (epoch + 1) % 5 == 0:
            print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")

    return model

# ====== Example Run ======
model = StockPredictor()
model = train_model(train_tensor)
torch.save(model.state_dict(), "stock_predictor.pth")
print("Model saved as stock_predictor.pth")


  data = torch.tensor(data_input, dtype = torch.float32).squeeze()


Epoch 5/50, Loss: 13467709440.0000
Epoch 10/50, Loss: 13719438336.0000
Epoch 15/50, Loss: 13128549376.0000
Epoch 20/50, Loss: 13612059648.0000
Epoch 25/50, Loss: 13428817920.0000
Epoch 30/50, Loss: 14125387776.0000
Epoch 35/50, Loss: 14081390592.0000
Epoch 40/50, Loss: 13241238528.0000
Epoch 45/50, Loss: 12957336576.0000
Epoch 50/50, Loss: 13719035904.0000
Model saved as stock_predictor.pth
