In [None]:

import numpy as np
import pandas as pd
from sklearn.datasets import fetch_rcv1
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score
import torch
import torch.nn as nn
import torch.optim as optim
import lightgbm as lgb
import optuna
import os

# Load RCV1 dataset
print("Loading RCV1 dataset...")
data = fetch_rcv1()
X, y = data.data[:50000], data.target[:50000]  # Limit dataset size for performance

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train.toarray(), dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.toarray(), dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.toarray(), dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.toarray(), dtype=torch.float32)


In [None]:

# Define PyTorch Neural Network
def train_neural_network(X_train_tensor, X_test_tensor, y_train_tensor, y_test_tensor):
    print("Training neural network with PyTorch...")

    class NeuralNetwork(nn.Module):
        def __init__(self, input_dim, output_dim):
            super(NeuralNetwork, self).__init__()
            self.fc1 = nn.Linear(input_dim, 128)
            self.dropout = nn.Dropout(0.2)
            self.fc2 = nn.Linear(128, 64)
            self.fc3 = nn.Linear(64, output_dim)

        def forward(self, x):
            x = torch.relu(self.fc1(x))
            x = self.dropout(x)
            x = torch.relu(self.fc2(x))
            x = torch.sigmoid(self.fc3(x))
            return x

    input_dim = X_train_tensor.shape[1]
    output_dim = y_train_tensor.shape[1]

    model = NeuralNetwork(input_dim, output_dim)
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    best_model_path = 'best_nn_model.pth'
    best_precision = 0

    for epoch in range(10):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train_tensor)
        loss = criterion(outputs, y_train_tensor)
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            y_pred = (model(X_test_tensor) > 0.5).float()
            precision = precision_score(y_test_tensor.numpy(), y_pred.numpy(), average='micro')
            recall = recall_score(y_test_tensor.numpy(), y_pred.numpy(), average='micro')

            if precision > best_precision:
                best_precision = precision
                torch.save(model.state_dict(), best_model_path)

        print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}")

    print("Best model saved with precision:", best_precision)

train_neural_network(X_train_tensor, X_test_tensor, y_train_tensor, y_test_tensor)


In [None]:

# LightGBM with Optuna
def objective(trial):
    param = {
        'objective': 'multiclass',
        'num_class': y_train.shape[1],
        'metric': 'multi_logloss',
        'boosting_type': 'gbdt',
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1, log=True),
        'num_leaves': trial.suggest_int('num_leaves', 20, 150),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 10, 100),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.6, 1.0)
    }

    d_train = lgb.Dataset(X_train, label=np.argmax(y_train.toarray(), axis=1))
    d_valid = lgb.Dataset(X_test, label=np.argmax(y_test.toarray(), axis=1), reference=d_train)

    gbm = lgb.train(param, d_train, valid_sets=[d_valid])

    y_pred = np.argmax(gbm.predict(X_test), axis=1)
    accuracy = accuracy_score(np.argmax(y_test.toarray(), axis=1), y_pred)

    return accuracy

def train_lightgbm():
    print("Training LightGBM with Optuna...")
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=5)

    print("Best parameters:", study.best_params)

    best_params = study.best_params
    d_train = lgb.Dataset(X_train, label=np.argmax(y_train.toarray(), axis=1))
    best_gbm = lgb.train(best_params, d_train, num_boost_round=100, valid_sets=[d_train])

    y_pred = np.argmax(best_gbm.predict(X_test), axis=1)
    accuracy = accuracy_score(np.argmax(y_test.toarray(), axis=1), y_pred)
    precision = precision_score(np.argmax(y_test.toarray(), axis=1), y_pred, average='micro')
    recall = recall_score(np.argmax(y_test.toarray(), axis=1), y_pred, average='micro')

    print(f"LightGBM Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}")

train_lightgbm()


In [None]:
# Compare PyTorch and LightGBM results
def compare_results():
    print("\nComparing PyTorch and LightGBM results...")

    # Load the best PyTorch model
    input_dim = X_train_tensor.shape[1]
    output_dim = y_train_tensor.shape[1]
    model = NeuralNetwork(input_dim, output_dim)
    model.load_state_dict(torch.load('best_nn_model.pth'))
    model.eval()

    with torch.no_grad():
        y_pred_nn = (model(X_test_tensor) > 0.5).float()
        accuracy_nn = accuracy_score(y_test_tensor.numpy(), y_pred_nn.numpy())
        precision_nn = precision_score(y_test_tensor.numpy(), y_pred_nn.numpy(), average='micro')
        recall_nn = recall_score(y_test_tensor.numpy(), y_pred_nn.numpy(), average='micro')

    # Evaluate the best LightGBM model
    best_params = study.best_params
    d_train = lgb.Dataset(X_train, label=np.argmax(y_train.toarray(), axis=1))
    best_gbm = lgb.train(best_params, d_train, num_boost_round=100, valid_sets=[d_train], verbose_eval=False)

    y_pred_gbm = np.argmax(best_gbm.predict(X_test), axis=1)
    accuracy_gbm = accuracy_score(np.argmax(y_test.toarray(), axis=1), y_pred_gbm)
    precision_gbm = precision_score(np.argmax(y_test.toarray(), axis=1), y_pred_gbm, average='micro')
    recall_gbm = recall_score(np.argmax(y_test.toarray(), axis=1), y_pred_gbm, average='micro')

    # Display results
    results = pd.DataFrame({
        'Model': ['PyTorch Neural Network', 'LightGBM'],
        'Accuracy': [accuracy_nn, accuracy_gbm],
        'Precision': [precision_nn, precision_gbm],
        'Recall': [recall_nn, recall_gbm]
    })

    print("\nResults Comparison:")
    print(results)

compare_results()
