In [1]:
!pip install optuna

Collecting optuna
  Downloading optuna-3.6.1-py3-none-any.whl (380 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.1/380.1 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.1-py3-none-any.whl (233 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.4/233.4 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.3-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.8/78.8 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Mako, colorlog, alembic, optuna
Successfully installed Mako-1.3.3 alembic-1.13.1 colorlog-6.8.2 optuna-3.6.1


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import optuna
import numpy as np
from tensorflow.keras.datasets import reuters
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load the Reuters dataset
max_len = 300  # Adjust based on the dataset analysis
(X_train, y_train), (X_test, y_test) = reuters.load_data(path="reuters.npz")
X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)

# Convert the data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

# Create a DataLoader
batch_size = 32
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

class NeuralNetwork(nn.Module):
    def __init__(self, input_dim, output_dim, n_hidden, n_units, dropout_rate):
        super(NeuralNetwork, self).__init__()
        layers = [nn.Linear(input_dim, n_units), nn.ReLU(), nn.Dropout(dropout_rate)]

        for _ in range(n_hidden):
            layers += [nn.Linear(n_units, n_units), nn.ReLU(), nn.Dropout(dropout_rate)]

        layers += [nn.Linear(n_units, output_dim)]
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten the input
        logits = self.network(x)
        return logits

def create_model(trial, input_dim, output_dim):
    n_hidden = trial.suggest_int('n_hidden', 2, 5)  # Increased range for hidden layers
    n_units = trial.suggest_int('n_units', 64, 256)  # Increased range for units per layer
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)  # Suggesting dropout rate
    model = NeuralNetwork(input_dim, output_dim, n_hidden, n_units, dropout_rate)
    return model


def objective(trial):
    model = create_model(trial, X_train.shape[1], len(np.unique(y_train)))
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(10):
        model.train()
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            output = model(X_batch)
            loss = criterion(output, y_batch)
            loss.backward()
            optimizer.step()

    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            output = model(X_batch)
            _, predicted = torch.max(output.data, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()

    accuracy = correct / total
    return accuracy



Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters.npz


In [3]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=10, n_jobs=-1)  # Reduced the number of trials for brevity

print(study.best_params)

# Create a model with the best hyperparameters found
best_model = create_model(study.best_trial, X_train.shape[1], len(np.unique(y_train)))

[I 2024-04-29 23:15:27,451] A new study created in memory with name: no-name-4f16d8f4-04f2-4a12-8558-f6c82b3cea8f
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
[I 2024-04-29 23:15:56,154] Trial 0 finished with value: 0.37934105075690117 and parameters: {'n_hidden': 5, 'n_units': 179, 'dropout_rate': 0.49605091496784925, 'learning_rate': 5.356339767565198e-05}. Best is trial 0 with value: 0.37934105075690117.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
[I 2024-04-29 23:16:04,448] Trial 1 finished with value: 0.3619768477292965 and parameters: {'n_hidden': 4, 'n_units': 167, 'dropout_rate': 0.21292832304818307, 'learning_rate': 0.0050759796550880615}. Best is trial 0 with value: 0.37934105075690117.
[I 2024-04-29 23:16:17,281] Trial 2 finished with value: 0.2773820124666073 and parameters: {'n_hidden': 2, 'n_units': 249, 'dropout_rate': 0.4957150897922328, 'learning_rate': 1.6660040028371906e-05}. Best is trial 0 with value: 0.3793410

{'n_hidden': 3, 'n_units': 171, 'dropout_rate': 0.23599819193024688, 'learning_rate': 0.000823571972729172}


In [4]:
# Evaluate the best model on test data
best_model.eval()
correct = 0
total = 0
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        output = best_model(X_batch)
        _, predicted = torch.max(output.data, 1)
        total += y_batch.size(0)
        correct += (predicted == y_batch).sum().item()

accuracy = correct / total
print(f"Accuracy of the best model: {accuracy}")

Accuracy of the best model: 0.030276046304541407
