In [6]:
import json
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from datetime import datetime
import torch.optim as optim
import torch.nn.functional as F

def load_data_from_json(json_file):
    """
    Loads the JSON file created by `create_train_json` and returns
    a sorted list of (timestamp_dt, X_window, y_val).
    """
    with open(json_file, "r") as f:
        data = json.load(f)
    
    records = []
    for ts_str, (x_window, y_val) in data.items():
        dt = datetime.fromisoformat(ts_str)  # Convert string to datetime
        records.append((dt, x_window, y_val))
    
    # Sort by datetime
    records.sort(key=lambda r: r[0])
    return records

def prepare_splits(records, train_size=30000, val_size=10000, test_size=15000):
    """
    Splits the sorted records into train, val, and test sets (chronologically).
    
    Returns (X_train, y_train, X_val, y_val, X_test, y_test) as NumPy arrays.
    """
    total_needed = train_size + val_size + test_size
    subset = records[:total_needed]  # in case you have more data
    
    X_all = []
    y_all = []
    
    for _, x_win, y_val in subset:
        X_all.append(x_win)
        y_all.append(y_val)
    
    X_all = np.array(X_all, dtype=np.float32)
    y_all = np.array(y_all, dtype=np.float32)
    
    X_train = X_all[:train_size]
    y_train = y_all[:train_size]
    
    X_val = X_all[train_size : train_size+val_size]
    y_val = y_all[train_size : train_size+val_size]
    
    X_test = X_all[train_size+val_size : train_size+val_size+test_size]
    y_test = y_all[train_size+val_size : train_size+val_size+test_size]
    
    return X_train, y_train, X_val, y_val, X_test, y_test

# Load data from JSON
all_records = load_data_from_json("BTC_train_data.json")
print("Total samples loaded:", len(all_records))


Total samples loaded: 55000


In [7]:

# Split into train/val/test
X_train_np, y_train_np, X_val_np, y_val_np, X_test_np, y_test_np = prepare_splits(
    all_records,
    train_size=30000,
    val_size=10000,
    test_size=15000
)

print("Train shape:", X_train_np.shape, y_train_np.shape)
print("Val shape:  ", X_val_np.shape,   y_val_np.shape)
print("Test shape: ", X_test_np.shape,  y_test_np.shape)


Train shape: (30000, 100) (30000,)
Val shape:   (10000, 100) (10000,)
Test shape:  (15000, 100) (15000,)


In [8]:

class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)
        self.y = torch.from_numpy(y)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, index):
        # Return (features, target)
        return self.X[index], self.y[index]

# Create dataset objects
train_dataset = TimeSeriesDataset(X_train_np, y_train_np)
val_dataset   = TimeSeriesDataset(X_val_np,   y_val_np)
test_dataset  = TimeSeriesDataset(X_test_np,  y_test_np)

# Create DataLoader objects
#   - shuffle only for train
batch_size = 256

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_dataset,   batch_size=batch_size, shuffle=False)
test_loader  = DataLoader(test_dataset,  batch_size=batch_size, shuffle=False)


In [15]:
import torch.nn as nn

input_dim = X_train_np.shape[1]

model = nn.Linear(in_features=input_dim, out_features=1)

In [18]:
import torch.optim as optim
import torch.nn.functional as F

def train_model(model, train_loader, val_loader, epochs=10, lr=1e-3):
    # Choose an optimizer (Adam or SGD)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    for epoch in range(1, epochs + 1):
        # ---- TRAINING PHASE ----
        model.train()
        train_losses = []
        
        for batch_x, batch_y in train_loader:
            # batch_x: shape (batch_size, window_size)
            # batch_y: shape (batch_size)
            
            # Forward pass
            pred = model(batch_x)  # shape (batch_size, 1)
            pred = pred.squeeze(1) # shape (batch_size)
            
            loss = F.mse_loss(pred, batch_y)
            
            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            train_losses.append(loss.item())
        
        # Average training loss
        train_loss_mean = np.mean(train_losses)
        
        # ---- VALIDATION PHASE ----
        model.eval()
        val_losses = []
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                pred = model(batch_x).squeeze(1)
                loss = F.mse_loss(pred, batch_y)
                val_losses.append(loss.item())
        
        val_loss_mean = np.mean(val_losses)
        
        print(f"[Epoch {epoch}/{epochs}] "
              f"Train MSE: {train_loss_mean:.4f} | "
              f"Val MSE: {val_loss_mean:.4f}")


# Train for 20 epochs as an example
train_model(model, train_loader, val_loader, epochs=20, lr=2e-4)

[Epoch 1/20] Train MSE: 3258.8954 | Val MSE: 2316.8016
[Epoch 2/20] Train MSE: 2377.2327 | Val MSE: 2328.3594
[Epoch 3/20] Train MSE: 2386.0943 | Val MSE: 2331.5752
[Epoch 4/20] Train MSE: 2337.9505 | Val MSE: 2264.4180
[Epoch 5/20] Train MSE: 2370.8813 | Val MSE: 2259.6350
[Epoch 6/20] Train MSE: 2397.1665 | Val MSE: 2323.6986
[Epoch 7/20] Train MSE: 2407.1159 | Val MSE: 2230.5633
[Epoch 8/20] Train MSE: 2368.6187 | Val MSE: 2732.6583
[Epoch 9/20] Train MSE: 2411.9937 | Val MSE: 2243.0132
[Epoch 10/20] Train MSE: 2328.0142 | Val MSE: 2212.5552
[Epoch 11/20] Train MSE: 2288.1631 | Val MSE: 2223.5644
[Epoch 12/20] Train MSE: 2285.3339 | Val MSE: 2194.7944
[Epoch 13/20] Train MSE: 2292.6956 | Val MSE: 2296.9722
[Epoch 14/20] Train MSE: 2295.4115 | Val MSE: 2358.8071
[Epoch 15/20] Train MSE: 2298.1767 | Val MSE: 2161.0060
[Epoch 16/20] Train MSE: 2364.6597 | Val MSE: 2457.3042
[Epoch 17/20] Train MSE: 2280.9746 | Val MSE: 2150.3453
[Epoch 18/20] Train MSE: 2213.7897 | Val MSE: 2112.2178
[