In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split, TensorDataset
import numpy as np
import random

In [2]:
class RLAgent(nn.Module):
    def __init__(self, state_size, action_size):
        super(RLAgent, self).__init__()
        self.fc1 = nn.Linear(state_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, action_size)

    def forward(self, state):
        x = F.relu(self.fc1(state))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

    def calculate_state(loss, gradients, optimizer_state):
        grad_norm = sum(g.norm().item() for g in gradients if g is not None)
        return torch.tensor([loss.item(), grad_norm, optimizer_state], dtype=torch.float32)

    def calculate_reward(previous_loss, current_loss):
        return current_loss - previous_loss

In [3]:
class SimpleNet(nn.Module):
    def __init__(self, input_size, output_size):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, output_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

In [7]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer

In [8]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [12]:
tokenizer = CountVectorizer(max_features=1000)
data = tokenizer.fit_transform(train['text']).toarray()
labels = train['author'].astype('category').cat.codes
data = torch.tensor(data, dtype=torch.float32)
labels = torch.tensor(labels.values, dtype=torch.long)

In [17]:
dataset = TensorDataset(data, labels)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [18]:
input_size = data.shape[1]
output_size = len(train['author'].unique())
model = SimpleNet(input_size, output_size)
criterion = nn.CrossEntropyLoss()
adopt_optimizer = optim.Adam(model.parameters(), lr=0.001)
sgd_optimizer = optim.SGD(model.parameters(), lr=0.01)
rl_agent = RLAgent(state_size=3, action_size=2)  # state=  [loss, grad_norm, optimizer_state]; Actions: [adam, SGD]
rl_optimizer = optim.Adam(rl_agent.parameters(), lr=0.001)

In [21]:
epochs = 10
previous_loss = float('inf')
def train_model():
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0.0
        optimizer_state = 0
        for batch_idx, (inputs, targets) in enumerate(train_loader):
            inputs, targets = inputs.float(), targets
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            gradients = torch.autograd.grad(loss, model.parameters(), retain_graph=True)
            state = compute_state(loss, gradients, optimizer_state)

            # RL agent's decision
            action_values = rl_agent(state)
            action = torch.argmax(action_values).item()

            if action == 0:
                optimizer = adopt_optimizer
                print('Using ADOPT')
            else:
                optimizer = sgd_optimizer
                print('Using SGD')

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            reward = compute_reward(previous_loss, loss.item())
            previous_loss = loss.item()

            # update RL agent
            target = reward + torch.max(action_values).detach()
            rl_loss = F.mse_loss(action_values[action], target)
            rl_optimizer.zero_grad()
            rl_loss.backward()
            rl_optimizer.step()
            epoch_loss += loss.item()
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss / len(train_loader):.4f}")