In [1]:
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import roc_auc_score
import xgboost as xgb

from RunningDataset import RunningDataset
from autoencoder import Autoencoder

class XGBoostClassifier:
    def __init__(self):
        self.autoencoder = Autoencoder()
        self.model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
    
    def encode_features(self, dataset, model_path=None):
        loader = DataLoader(dataset, batch_size=512, shuffle=False)
        features = []

        if model_path:
            self.autoencoder.load_state_dict(torch.load(model_path))

        self.autoencoder.eval()
        with torch.no_grad():
            for x, _ in loader:
                encoded = self.autoencoder.encode_sample(x).cpu().numpy()
                features.append(encoded)

        return np.vstack(features)

    def fit(self, X_train, y_train):
        self.model.fit(X_train, y_train)

    def predict(self, X):
        return self.model.predict_proba(X)[:, 1]

    def evaluate(self, X_test, y_test):
        y_pred = self.predict(X_test)
        auc_score = roc_auc_score(y_test, y_pred)
        return auc_score

def run():
    dataset = RunningDataset()
    X_train, y_train, X_test, y_test = dataset.preprocess()

    autoencoder = Autoencoder()
    train_dataset = TensorDataset(torch.Tensor(X_train), torch.Tensor(X_train))
    test_dataset = TensorDataset(torch.Tensor(X_test), torch.Tensor(X_test))

    # Assume autoencoder is already trained and saved
    # autoencoder.train_model(train_loader) # Placeholder if training is needed

    classifier = XGBoostClassifier()
    encoded_X_train = classifier.encode_features(train_dataset, 'saved_autoencoder_models/autoencoder_epoch_100.pth')
    encoded_X_test = classifier.encode_features(test_dataset, 'saved_autoencoder_models/autoencoder_epoch_100.pth')

    # Convert labels to numpy arrays
    y_train_np = np.array(y_train)
    y_test_np = np.array(y_test)

    classifier.fit(encoded_X_train, y_train_np)
    auc = classifier.evaluate(encoded_X_test, y_test_np)
    print(f"AUC Score: {auc:.4f}")

ModuleNotFoundError: No module named 'xgboost'