In [11]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import TimeSeriesSplit
from torch.utils.data import Dataset, DataLoader

In [12]:
# Load data
data = pd.read_parquet('../cache/encoded_99q_scaled.parquet')

In [13]:
class GDELT(Dataset):
    def __init__(self, features, target):
        if hasattr(features, 'values'):
            features = features.values
        if hasattr(target, 'values'):
            target = target.values
           
        self.features = torch.tensor(features, dtype=torch.float32)
        self.target = torch.tensor(target, dtype=torch.float32)
        
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        return self.features[idx], self.target[idx]

In [14]:
class PricePredictor(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        
        # simple neural network with 3 layers (linear)
        self.network = nn.Sequential(
            # 1st layer
            nn.Linear(input_dim, 128), # input layer to hidden layer 1 with 128 neurons
            nn.BatchNorm1d(128), # batch normalization layer to normalize the output of the 1st layer before activation function is applied to it
            nn.ReLU(), # activation function to introduce non-linearity to the model output from the 1st layer
            nn.Dropout(0.3), # dropout layer to prevent overfitting by randomly setting 30% of the output from the 1st layer to 0
        
            # 2nd layer
            nn.Linear(128, 64), # hidden layer 1 to hidden layer 2 with 64 neurons
            nn.BatchNorm1d(64), # batch normalization layer
            nn.ReLU(), # activation function
            nn.Dropout(0.3), # dropout layer
        
            # 3rd layer
            nn.Linear(64, 32), # hidden layer 2 to hidden layer 3 with 32 neurons
            nn.BatchNorm1d(32), # batch normalization layer
            nn.ReLU(), # activation function
            nn.Dropout(0.3), # dropout layer
        
            # output layer
            nn.Linear(32, 1) # hidden layer 3 to output layer with 1 neuron
        )
        
    def forward(self, x):
        return self.network(x)

In [15]:
data.columns

Index(['Timestamp', 'Actor1Country', 'Actor1GeoCountry', 'Actor1Type',
       'Actor2Country', 'Actor2GeoCountry', 'Actor2Type', 'ActionCountry',
       'EventType', 'GoldsteinScale', 'NumSources', 'NumArticles', 'AvgTone',
       'Magnitude', 'Impact', 'Impact_bin', 'pct_change_15min',
       'pct_change_30min', 'pct_change_24h', 'AbsChange'],
      dtype='object')

In [16]:
def prep_data(df, target='pct_change_30min'):
    feature_cols = [
        'Actor1Country', 'Actor1GeoCountry', 'Actor1Type',
        'Actor2Country', 'Actor2GeoCountry', 'Actor2Type',
        'ActionCountry', 'EventType', 'GoldsteinScale',
        'NumSources', 'NumArticles', 'AvgTone',
        'Magnitude', 'Impact'
    ]
        
    x = df[feature_cols]
    y = df[target]
    
    scaler = StandardScaler()
    x_scaled = scaler.fit_transform(x)
    
    return x_scaled, y, scaler

In [17]:
def train_model(x, y, batch_size=256, epochs=50, learning_rate=0.001, patience=10):
    tscv = TimeSeriesSplit(n_splits=5)
    train_idx, test_idx = list(tscv.split(x))[-1] # use the last split as test set
    
    x_train, y_train = x[train_idx], y[train_idx]
    x_val, y_val = x[test_idx], y[test_idx]
    
    train_dataset = GDELT(x_train, y_train)
    val_dataset = GDELT(x_val, y_val)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    model = PricePredictor(x.shape[1])
    
    criterion = nn.MSELoss() # loss function (mean squared error)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
    best_loss = np.inf
    patience_counter = 0
    best_model = None
    
    # training loop
    for epoch in range(epochs):
        # train
        model.train()
        train_loss = 0
        
        for features, target in train_loader:
            optimizer.zero_grad()
            output = model(features)
            loss = criterion(output, target.reshape(-1, 1))
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        
        # val
        model.eval()
        val_loss = 0
        predictions = []
        actuals = []
        
        with torch.no_grad():
            for features, target in val_loader:
                output = model(features)
                loss = criterion(output, target.reshape(-1, 1))
                val_loss += loss.item()
                predictions.extend(output.numpy().flatten())
                actuals.extend(target.numpy().flatten())
        
        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        
        print(f'Epoch {epoch+1}/{epochs}, train loss: {train_loss:.4f}, val loss: {val_loss:.4f}, accuracy: {np.mean(np.sign(predictions) == np.sign(actuals))}')
        
        if val_loss < best_loss:
            best_loss = val_loss
            best_model = model
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter == patience:
                print(f'Early stopping at epoch {epoch+1} with val loss: {val_loss:.4f}')
                break
    
    model.load_state_dict(best_model.state_dict())
    return model

In [18]:
data.head()

Unnamed: 0_level_0,Timestamp,Actor1Country,Actor1GeoCountry,Actor1Type,Actor2Country,Actor2GeoCountry,Actor2Type,ActionCountry,EventType,GoldsteinScale,NumSources,NumArticles,AvgTone,Magnitude,Impact,Impact_bin,pct_change_15min,pct_change_30min,pct_change_24h,AbsChange
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2019-01-01,-1.530752,9,13,9,13,11,9,11,10,0.605751,0.889228,2.004333,1.164239,0.375252,0.714246,Positive,-0.033061,-0.226363,-2.433464,-0.199873
2019-01-01,-1.530752,9,13,9,13,11,9,11,10,0.731167,0.36707,0.597142,0.468568,-0.71682,0.425568,Slightly Positive,-0.033061,-0.226363,-2.433464,-0.199873
2019-01-01,-1.530752,9,13,9,13,11,9,11,10,0.563945,4.022178,1.300737,1.004049,0.96018,0.848379,Positive,-0.033061,-0.226363,-2.433464,-0.199873
2019-01-01,-1.530752,9,13,9,13,11,9,11,10,-0.523,0.36707,-0.036094,-0.520616,0.334805,-0.355905,Slightly Negative,-0.033061,-0.226363,-2.433464,-0.199873
2019-01-01,-1.530752,9,13,9,13,11,9,11,10,0.292209,1.933545,-0.247173,0.921836,-0.100779,0.329342,Slightly Positive,-0.033061,-0.226363,-2.433464,-0.199873


In [19]:
sample = data.sample(1000000)
sample.reset_index(drop=True, inplace=True)

In [20]:
import torch
import torch.nn as nn
import numpy as np
from sklearn.preprocessing import RobustScaler
from torch.utils.data import DataLoader

class ResidualBlock(nn.Module):
    def __init__(self, in_features):
        super().__init__()
        self.block = nn.Sequential(
            nn.Linear(in_features, in_features),
            nn.LayerNorm(in_features),
            nn.GELU(),  # GELU instead of ReLU/LeakyReLU
            nn.Linear(in_features, in_features),
            nn.LayerNorm(in_features),
        )
        self.gelu = nn.GELU()

    def forward(self, x):
        return self.gelu(x + self.block(x))

class PricePredictor(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        
        # Initial feature extraction
        self.feature_extraction = nn.Sequential(
            nn.Linear(input_dim, 32),
            nn.LayerNorm(32),
            nn.GELU()
        )
        
        # Residual blocks
        self.residual_blocks = nn.Sequential(
            ResidualBlock(32),
            ResidualBlock(32),
            ResidualBlock(32)
        )
        
        # Price movement prediction
        self.predictor = nn.Sequential(
            nn.Linear(32, 16),
            nn.LayerNorm(16),
            nn.GELU(),
            nn.Dropout(0.1),
            nn.Linear(16, 1)
        )
        
        # Initialize weights
        self.apply(self._init_weights)
    
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            nn.init.xavier_uniform_(module.weight, gain=1e-2)
            if module.bias is not None:
                nn.init.constant_(module.bias, 0)
    
    def forward(self, x):
        x = self.feature_extraction(x)
        x = self.residual_blocks(x)
        return self.predictor(x)

def train_model(X, y, batch_size=128, epochs=100):
    """Train with improved monitoring"""
    
    # Split data - use last 20% for validation
    split_idx = int(len(X) * 0.8)
    X_train, X_val = X[:split_idx], X[split_idx:]
    y_train, y_val = y[:split_idx], y[split_idx:]
    
    if hasattr(X_train, 'values'):
        X_train = X_train.values
        X_val = X_val.values
    if hasattr(y_train, 'values'):
        y_train = y_train.values
        y_val = y_val.values
    
    # Create dataloaders
    train_dataset = torch.utils.data.TensorDataset(
        torch.FloatTensor(X_train),
        torch.FloatTensor(y_train)
    )
    val_dataset = torch.utils.data.TensorDataset(
        torch.FloatTensor(X_val),
        torch.FloatTensor(y_val)
    )
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    
    # Initialize model
    model = PricePredictor(input_dim=X.shape[1])
    
    # Custom loss combining MSE and directional accuracy
    class DirectionalMSELoss(nn.Module):
        def __init__(self, alpha=0.7):
            super().__init__()
            self.alpha = alpha
            self.mse = nn.MSELoss()

        def forward(self, pred, target):
            mse_loss = self.mse(pred, target)
            # Directional loss
            direction_loss = torch.mean(
                1 - torch.sign(pred) * torch.sign(target)
            )
            return self.alpha * mse_loss + (1 - self.alpha) * direction_loss

    criterion = DirectionalMSELoss()
    
    # Optimizer with cosine annealing
    optimizer = torch.optim.AdamW(
        model.parameters(),
        lr=0.001,
        weight_decay=0.01,
        betas=(0.9, 0.999)
    )
    
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer,
        T_0=10,  # Reset every 10 epochs
        T_mult=2  # Double the reset interval after each reset
    )
    
    best_val_loss = float('inf')
    best_model = None
    patience = 10
    patience_counter = 0
    
    for epoch in range(epochs):
        # Training phase
        model.train()
        train_losses = []
        train_directions = []
        
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y.reshape(-1, 1))
            loss.backward()
            
            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=0.5)
            
            optimizer.step()
            train_losses.append(loss.item())
            
            # Calculate direction accuracy
            pred_direction = torch.sign(outputs.detach())
            true_direction = torch.sign(batch_y.reshape(-1, 1))
            train_directions.append(
                (pred_direction == true_direction).float().mean().item()
            )
        
        scheduler.step()
        
        # Validation phase
        model.eval()
        val_losses = []
        val_directions = []
        
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                outputs = model(batch_X)
                val_loss = criterion(outputs, batch_y.reshape(-1, 1))
                val_losses.append(val_loss.item())
                
                pred_direction = torch.sign(outputs)
                true_direction = torch.sign(batch_y.reshape(-1, 1))
                val_directions.append(
                    (pred_direction == true_direction).float().mean().item()
                )
        
        # Calculate metrics
        train_loss = np.mean(train_losses)
        val_loss = np.mean(val_losses)
        train_dir_acc = np.mean(train_directions)
        val_dir_acc = np.mean(val_directions)
        
        print(f'Epoch [{epoch+1}/{epochs}]')
        print(f'Train Loss: {train_loss:.6f}, Dir Acc: {train_dir_acc:.4f}')
        print(f'Val Loss: {val_loss:.6f}, Dir Acc: {val_dir_acc:.4f}')
        print(f'LR: {scheduler.get_last_lr()[0]:.6f}\n')
        
        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            
        if patience_counter >= patience:
            print(f'Early stopping triggered at epoch {epoch+1}')
            break
    
    model.load_state_dict(best_model)
    return model


X, y, scaler = prep_data(sample)

# Train model
# model = train_model(X, y)

Epoch [1/100]
Train Loss: 0.520534, Dir Acc: 0.5013
Val Loss: 0.512938, Dir Acc: 0.5028
LR: 0.000976

Epoch [2/100]
Train Loss: 0.520485, Dir Acc: 0.5013
Val Loss: 0.512943, Dir Acc: 0.5028
LR: 0.000905

Epoch [3/100]
Train Loss: 0.519841, Dir Acc: 0.5024
Val Loss: 0.518164, Dir Acc: 0.4941
LR: 0.000794

Epoch [4/100]
Train Loss: 0.520391, Dir Acc: 0.5015
Val Loss: 0.512932, Dir Acc: 0.5028
LR: 0.000655

Epoch [5/100]
Train Loss: 0.519914, Dir Acc: 0.5023
Val Loss: 0.518151, Dir Acc: 0.4941
LR: 0.000500

Epoch [6/100]
Train Loss: 0.519540, Dir Acc: 0.5029
Val Loss: 0.518202, Dir Acc: 0.4941
LR: 0.000345

Epoch [7/100]
Train Loss: 0.519807, Dir Acc: 0.5024
Val Loss: 0.512935, Dir Acc: 0.5028
LR: 0.000206

Epoch [8/100]
Train Loss: 0.519736, Dir Acc: 0.5026
Val Loss: 0.514279, Dir Acc: 0.5005
LR: 0.000095

Epoch [9/100]
Train Loss: 0.519352, Dir Acc: 0.5032
Val Loss: 0.514468, Dir Acc: 0.5002
LR: 0.000024

Epoch [10/100]
Train Loss: 0.519281, Dir Acc: 0.5033
Val Loss: 0.513837, Dir Acc: 