In [None]:
import torch
import numpy as np
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, Dataset
from matplotlib import pyplot as plt

In [None]:
class KibaDataset(Dataset):
    def __init__(self, features):
        self.features = torch.tensor(features, dtype=torch.float32)
    
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        return self.features[idx]

In [None]:
class KibaModel(torch.nn.Module):
    def __init__(self, input_size):
        super(KibaModel, self).__init__()
        self.model = torch.nn.Sequential(
            torch.nn.Linear(input_size, 256),
            torch.nn.BatchNorm1d(256),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.4),
            torch.nn.Linear(256, 128),
            torch.nn.BatchNorm1d(128),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.3),
            torch.nn.Linear(128, 64),
            torch.nn.BatchNorm1d(64),
            torch.nn.ReLU(),
            torch.nn.Linear(64, 1)
        )
    
    def forward(self, x):
        return self.model(x)

In [None]:
# Load the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
input_size = 576 # This assumes the same feature engineering has happened to the test set
model = KibaModel(input_size)
model.load_state_dict(torch.load('regression_model.pth')) 
model.to(device)
model.eval()

In [None]:
# Load and preprocess the test data
loaded_data = np.load('processed_dataset.npz')
test_features = loaded_data['features']
targets = loaded_data['targets']

scaler_features = StandardScaler()
test_features = scaler_features.fit_transform(test_features)

# Create test DataLoader
test_dataset = KibaDataset(test_features)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
# Perform inference
predictions = []
with torch.no_grad():
    for inputs in test_loader:
        inputs = inputs.to(device)
        outputs = model(inputs)
        predictions.extend(outputs.cpu().numpy())

predictions = np.array(predictions)
predictions = np.expm1(predictions)  # Inverse log1p transformation

In [None]:
# Visualization of Predictions vs Ground Truth
plt.figure(figsize=(8, 8))
plt.scatter(targets, predictions, alpha=0.5)
plt.plot([targets.min(), targets.max()], [targets.min(), targets.max()], 'r--', lw=2)
plt.title('Predictions vs Ground Truth')
plt.xlabel('Ground Truth')
plt.ylabel('Predictions')
plt.grid(True)
plt.show()