In [12]:
"""
Load all 4 trained models - FINAL WORKING VERSION
"""
import pickle
import torch
import torch.nn as nn
import xgboost as xgb
import numpy as np

# Define LSTM model architecture with 2 FC layers
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, fc1_size, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, fc1_size)
        self.fc2 = nn.Linear(fc1_size, output_size)
    
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        out = self.fc1(lstm_out[:, -1, :])
        out = self.fc2(out)
        return out

# Define TCN model architecture
class TemporalBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, dilation):
        super(TemporalBlock, self).__init__()
        padding = (kernel_size - 1) * dilation
        self.conv1 = nn.Module()
        self.conv1.conv = nn.Conv1d(in_channels, out_channels, kernel_size, padding=padding, dilation=dilation)
        
        self.conv2 = nn.Module()
        self.conv2.conv = nn.Conv1d(out_channels, out_channels, kernel_size, padding=padding, dilation=dilation)
        
        self.downsample = nn.Conv1d(in_channels, out_channels, 1) if in_channels != out_channels else None
        self.relu = nn.ReLU()
    
    def forward(self, x):
        out = self.relu(self.conv1.conv(x))
        out = self.relu(self.conv2.conv(out))
        res = x if self.downsample is None else self.downsample(x)
        return self.relu(out + res)

class TCNModel(nn.Module):
    def __init__(self, num_inputs, num_channels, kernel_size, output_size=3):
        super(TCNModel, self).__init__()
        layers = []
        num_levels = len(num_channels)
        for i in range(num_levels):
            dilation = 2 ** i
            in_ch = num_inputs if i == 0 else num_channels[i-1]
            out_ch = num_channels[i]
            layers.append(TemporalBlock(in_ch, out_ch, kernel_size, dilation))
        self.network = nn.Sequential(*layers)
        self.fc = nn.Linear(num_channels[-1], output_size)
    
    def forward(self, x):
        out = self.network(x)
        out = out[:, :, -1]
        return self.fc(out)

# Define Transformer model architecture
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.pe = nn.Parameter(torch.randn(1, max_len, d_model))
    
    def forward(self, x):
        return x + self.pe[:, :x.size(1), :]

class TransformerModel(nn.Module):
    def __init__(self, input_size, d_model, nhead, num_layers, dim_feedforward, fc1_size, output_size, max_len=5000):
        super(TransformerModel, self).__init__()
        self.input_projection = nn.Linear(input_size, d_model)
        self.pos_encoder = PositionalEncoding(d_model, max_len)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model, 
            nhead=nhead, 
            dim_feedforward=dim_feedforward,
            batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc1 = nn.Linear(d_model, fc1_size)
        self.fc2 = nn.Linear(fc1_size, output_size)
    
    def forward(self, x):
        x = self.input_projection(x)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)
        x = x[:, -1, :]
        x = self.fc1(x)
        return self.fc2(x)

print("Loading all models...\n")

try:
    # 1. XGBoost
    print("Loading XGBoost...")
    xgb_model = xgb.Booster()
    xgb_model.load_model("C:/Users/wdkal/Downloads/RL_Model/xgb_best_model.json")
    print("‚úì XGBoost loaded")
    
    # 2. LSTM
    print("Loading LSTM...")
    lstm_state = torch.load("C:/Users/wdkal/Downloads/RL_Model/lstm_best_model.pt", 
                            map_location=torch.device('cpu'))
    
    weight_ih = lstm_state['lstm.weight_ih_l0']
    input_size = weight_ih.shape[1]
    hidden_size = weight_ih.shape[0] // 4
    num_layers = 2
    fc1_size = lstm_state['fc1.weight'].shape[0]
    output_size = lstm_state['fc2.weight'].shape[0]
    
    lstm_model = LSTMModel(input_size, hidden_size, num_layers, fc1_size, output_size)
    lstm_model.load_state_dict(lstm_state)
    lstm_model.eval()
    print(f"‚úì LSTM loaded (input={input_size}, hidden={hidden_size})")
    
    # 3. TCN
    print("Loading TCN...")
    tcn_state = torch.load("C:/Users/wdkal/Downloads/RL_Model/tcn_best_model.pt",
                           map_location=torch.device('cpu'))
    
    first_conv_weight = tcn_state['network.0.conv1.conv.weight']
    num_inputs = first_conv_weight.shape[1]
    kernel_size = first_conv_weight.shape[2]
    
    num_channels = []
    i = 0
    while f'network.{i}.conv1.conv.weight' in tcn_state:
        num_channels.append(tcn_state[f'network.{i}.conv1.conv.weight'].shape[0])
        i += 1
    
    output_size = tcn_state['fc.weight'].shape[0] if 'fc.weight' in tcn_state else 3
    
    tcn_model = TCNModel(num_inputs, num_channels, kernel_size, output_size=output_size)
    tcn_model.load_state_dict(tcn_state)
    tcn_model.eval()
    print(f"‚úì TCN loaded (input={num_inputs}, channels={num_channels})")
    
    # 4. Transformer - Extract from checkpoint
    print("Loading Transformer...")
    checkpoint = torch.load("C:/Users/wdkal/Downloads/RL_Model/best_transformer_model.pth",
                           map_location=torch.device('cpu'), weights_only=False)
    
    transformer_state = checkpoint['model_state_dict']
    hyperparams = checkpoint['hyperparameters']
    
    # Get all parameters
    input_size = hyperparams['input_size']
    d_model = hyperparams['d_model']
    nhead = hyperparams['nhead']
    num_layers = hyperparams['num_layers']
    dim_feedforward = hyperparams['dim_feedforward']
    
    # Infer fc sizes from weights
    fc1_size = transformer_state['fc1.weight'].shape[0]
    output_size = transformer_state['fc2.weight'].shape[0]
    
    # Infer max_len from positional encoding
    max_len = transformer_state['pos_encoder.pe'].shape[1]
    
    transformer_model = TransformerModel(
        input_size, d_model, nhead, num_layers, 
        dim_feedforward, fc1_size, output_size, max_len
    )
    transformer_model.load_state_dict(transformer_state)
    transformer_model.eval()
    print(f"‚úì Transformer loaded (d_model={d_model}, nhead={nhead}, layers={num_layers})")
    
    print("\n" + "="*60)
    print("‚úÖ ALL 4 MODELS LOADED SUCCESSFULLY!")
    print("="*60)
    print("\nModels ready for inference:")
    print(f"1. xgb_model: XGBoost Booster")
    print(f"2. lstm_model: LSTM (input=22, hidden=32, output=3)")
    print(f"3. tcn_model: TCN (input=22, channels=[32,64,64], output=3)")
    print(f"4. transformer_model: Transformer (d_model={d_model}, heads={nhead}, output={output_size})")
    print("\nüéâ All models are in eval() mode and ready for predictions!")

except Exception as e:
    print(f"‚ùå Error: {e}")
    import traceback
    traceback.print_exc()

Loading all models...

Loading XGBoost...
‚úì XGBoost loaded
Loading LSTM...
‚úì LSTM loaded (input=22, hidden=32)
Loading TCN...
‚úì TCN loaded (input=22, channels=[32, 64, 64])
Loading Transformer...
‚úì Transformer loaded (d_model=128, nhead=8, layers=4)

‚úÖ ALL 4 MODELS LOADED SUCCESSFULLY!

Models ready for inference:
1. xgb_model: XGBoost Booster
2. lstm_model: LSTM (input=22, hidden=32, output=3)
3. tcn_model: TCN (input=22, channels=[32,64,64], output=3)
4. transformer_model: Transformer (d_model=128, heads=8, output=3)

üéâ All models are in eval() mode and ready for predictions!
