In [3]:
import json
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from datetime import datetime
import torch.optim as optim
import torch.nn.functional as F

def load_data_from_json(json_file):
    """
    Loads the JSON file created by `create_train_json` and returns
    a sorted list of (timestamp_dt, X_window, y_val).
    """
    with open(json_file, "r") as f:
        data = json.load(f)
    
    records = []
    for ts_str, (x_window, y_val) in data.items():
        dt = datetime.fromisoformat(ts_str)  # Convert string to datetime
        records.append((dt, x_window, y_val))
    
    # Sort by datetime
    records.sort(key=lambda r: r[0])
    return records

def prepare_splits(records, train_size=30000, val_size=10000, test_size=15000):
    """
    Splits the sorted records into train, val, and test sets (chronologically).
    
    Returns (X_train, y_train, X_val, y_val, X_test, y_test) as NumPy arrays.
    """
    total_needed = train_size + val_size + test_size
    subset = records[:total_needed]  # in case you have more data
    
    X_all = []
    y_all = []
    
    for _, x_win, y_val in subset:
        X_all.append(x_win)
        y_all.append(y_val)
    
    X_all = np.array(X_all, dtype=np.float32)
    y_all = np.array(y_all, dtype=np.float32)
    
    X_train = X_all[:train_size]
    y_train = y_all[:train_size]
    
    X_val = X_all[train_size : train_size+val_size]
    y_val = y_all[train_size : train_size+val_size]
    
    X_test = X_all[train_size+val_size : train_size+val_size+test_size]
    y_test = y_all[train_size+val_size : train_size+val_size+test_size]
    
    return X_train, y_train, X_val, y_val, X_test, y_test

# Load data from JSON
all_records = load_data_from_json("BTC_train_data_new.json")
print("Total samples loaded:", len(all_records))


Total samples loaded: 195000


In [12]:

# Split into train/val/test
X_train_np, y_train_np, X_val_np, y_val_np, X_test_np, y_test_np = prepare_splits(
    all_records,
    train_size=140000,
    val_size=20000,
    test_size=20000
)

print("Train shape:", X_train_np.shape, y_train_np.shape)
print("Val shape:  ", X_val_np.shape,   y_val_np.shape)
print("Test shape: ", X_test_np.shape,  y_test_np.shape)


Train shape: (140000, 100) (140000,)
Val shape:   (20000, 100) (20000,)
Test shape:  (20000, 100) (20000,)


In [13]:

class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)
        self.y = torch.from_numpy(y)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, index):
        # Return (features, target)
        return self.X[index], self.y[index]

# Create dataset objects
train_dataset = TimeSeriesDataset(X_train_np, y_train_np)
val_dataset   = TimeSeriesDataset(X_val_np,   y_val_np)
test_dataset  = TimeSeriesDataset(X_test_np,  y_test_np)

# Create DataLoader objects
#   - shuffle only for train
batch_size = 256

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_dataset,   batch_size=batch_size, shuffle=False)
test_loader  = DataLoader(test_dataset,  batch_size=batch_size, shuffle=False)


In [18]:
import torch.nn as nn

input_dim = X_train_np.shape[1]

model = nn.Linear(in_features=input_dim, out_features=1)

In [22]:
import torch
import torch.optim as optim
import torch.nn.functional as F
import numpy as np

def train_model(model, train_loader, val_loader, epochs=10, lr=1e-3, device="cpu"):
    model.to(device)  # Move model to the appropriate device
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    for epoch in range(1, epochs + 1):
        # ---- TRAINING PHASE ----
        model.train()
        train_losses = []
        
        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)  # Move to GPU if available
            
            # Forward pass
            pred = model(batch_x).squeeze(1)  # shape (batch_size)
            loss = F.mse_loss(pred, batch_y)
            
            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            train_losses.append(loss.item())
        
        train_loss_mean = np.mean(train_losses)
        
        # ---- VALIDATION PHASE ----
        model.eval()
        val_losses = []
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                pred = model(batch_x).squeeze(1)
                loss = F.mse_loss(pred, batch_y)
                val_losses.append(loss.item())
        
        val_loss_mean = np.mean(val_losses)
        
        print(f"[Epoch {epoch}/{epochs}] "
              f"Train MSE: {train_loss_mean:.4f} | "
              f"Val MSE: {val_loss_mean:.4f}")

# Example usage
train_model(model, train_loader, val_loader, epochs=20, lr=2e-4 )


[Epoch 1/20] Train MSE: 18597.3799 | Val MSE: 58513.2072
[Epoch 2/20] Train MSE: 17925.3028 | Val MSE: 74497.1344
[Epoch 3/20] Train MSE: 17199.0672 | Val MSE: 64591.0666
[Epoch 4/20] Train MSE: 17675.9906 | Val MSE: 56952.8802
[Epoch 5/20] Train MSE: 17457.9061 | Val MSE: 57242.8312
[Epoch 6/20] Train MSE: 17608.0719 | Val MSE: 56425.4542
[Epoch 7/20] Train MSE: 17000.9513 | Val MSE: 62292.5403
[Epoch 8/20] Train MSE: 17504.1561 | Val MSE: 58344.7456
[Epoch 9/20] Train MSE: 17054.2809 | Val MSE: 65594.9181
[Epoch 10/20] Train MSE: 17230.4352 | Val MSE: 55497.5701
[Epoch 11/20] Train MSE: 16945.9657 | Val MSE: 55192.7850
[Epoch 12/20] Train MSE: 17190.9933 | Val MSE: 54986.6254
[Epoch 13/20] Train MSE: 17447.8068 | Val MSE: 57449.9099
[Epoch 14/20] Train MSE: 16497.2236 | Val MSE: 60419.1852
[Epoch 15/20] Train MSE: 16519.0755 | Val MSE: 56327.7269
[Epoch 16/20] Train MSE: 16238.7007 | Val MSE: 74235.9477
[Epoch 17/20] Train MSE: 16435.2349 | Val MSE: 60842.4948
[Epoch 18/20] Train MSE

In [None]:
import torch
print(torch.__version__)

2.6.0+cpu
