In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split, TensorDataset
import numpy as np
import random

In [2]:
class RLAgent(nn.Module):
    def __init__(self, state_size, action_size):
        super(RLAgent, self).__init__()
        self.fc1 = nn.Linear(state_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, action_size)

    def forward(self, state):
        x = F.relu(self.fc1(state))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

    def calculate_state(self, loss, gradients, optimizer_state):
        grad_norm = sum(g.norm().item() for g in gradients if g is not None)
        return torch.tensor([loss.item(), grad_norm, optimizer_state], dtype=torch.float32)

    def calculate_reward(self, previous_loss, current_loss):
        return current_loss - previous_loss

In [3]:
class SimpleNet(nn.Module):
    def __init__(self, input_size, output_size):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, output_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

In [4]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer

In [5]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [6]:
tokenizer = CountVectorizer(max_features=1000)
data = tokenizer.fit_transform(train['text']).toarray()
labels = train['author'].astype('category').cat.codes
data = torch.tensor(data, dtype=torch.float32)
labels = torch.tensor(labels.values, dtype=torch.long)

In [7]:
dataset = TensorDataset(data, labels)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [8]:
input_size = data.shape[1]
output_size = len(train['author'].unique())
model = SimpleNet(input_size, output_size)
criterion = nn.CrossEntropyLoss()
adopt_optimizer = optim.Adam(model.parameters(), lr=0.001)
sgd_optimizer = optim.SGD(model.parameters(), lr=0.01)
rl_agent = RLAgent(state_size=3, action_size=2)  # state=  [loss, grad_norm, optimizer_state]; Actions: [adam, SGD]
rl_optimizer = optim.Adam(rl_agent.parameters(), lr=0.001)

In [None]:
def train_model(epochs, previous_loss):
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0.0
        optimizer_state = 0
        for batch_idx, (inputs, targets) in enumerate(train_loader):
            inputs, targets = inputs.float(), targets
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            gradients = torch.autograd.grad(loss, model.parameters(), retain_graph=True)
            state = rl_agent.calculate_state(loss, gradients, optimizer_state)

            # RL agent's decision
            action_values = rl_agent(state)
            action = torch.argmax(action_values).item()

            if action == 0:
                optimizer = adopt_optimizer
                # print('Using ADOPT')
            else:
                optimizer = sgd_optimizer
                # print('Using SGD')

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            reward = rl_agent.calculate_reward(previous_loss, loss.item())
            previous_loss = loss.item()

            # update RL agent
            target = reward + torch.max(action_values).detach()
            rl_loss = F.mse_loss(action_values[action], target)
            rl_optimizer.zero_grad()
            rl_loss.backward()
            rl_optimizer.step()
            epoch_loss += loss.item()
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss / len(train_loader):.4f}")

In [19]:
def train_model_rl(epochs, previous_loss):
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0.0
        optimizer_state = 0
        for batch_idx, (inputs, targets) in enumerate(train_loader):
            inputs, targets = inputs.float(), targets
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            gradients = torch.autograd.grad(loss, model.parameters(), retain_graph=True)
            state = rl_agent.calculate_state(loss, gradients, optimizer_state)

            # RL agent's decision
            action_values = rl_agent(state)
            action = torch.argmax(action_values).item()

            if action == 0:
                optimizer = adopt_optimizer
                # print('Using ADOPT')
            else:
                optimizer = sgd_optimizer
                # print('Using SGD')

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            reward = rl_agent.calculate_reward(previous_loss, loss.item())
            previous_loss = loss.item()

            # update RL agent
            target = reward + torch.max(action_values).detach()
            rl_loss = F.mse_loss(action_values[action], target)
            rl_optimizer.zero_grad()
            rl_loss.backward()
            rl_optimizer.step()
            epoch_loss += loss.item()
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss / len(train_loader):.4f}")

In [25]:
train_model(50, 100)

Epoch 1/50, Loss: 0.0022
Epoch 2/50, Loss: 0.0020
Epoch 3/50, Loss: 0.0211
Epoch 4/50, Loss: 0.0132
Epoch 5/50, Loss: 0.0050
Epoch 6/50, Loss: 0.0023
Epoch 7/50, Loss: 0.0021
Epoch 8/50, Loss: 0.0020
Epoch 9/50, Loss: 0.0020
Epoch 10/50, Loss: 0.0019
Epoch 11/50, Loss: 0.0020
Epoch 12/50, Loss: 0.0020
Epoch 13/50, Loss: 0.0021
Epoch 14/50, Loss: 0.0021
Epoch 15/50, Loss: 0.0020
Epoch 16/50, Loss: 0.0020
Epoch 17/50, Loss: 0.0020
Epoch 18/50, Loss: 0.0020
Epoch 19/50, Loss: 0.0020
Epoch 20/50, Loss: 0.0020
Epoch 21/50, Loss: 0.0021
Epoch 22/50, Loss: 0.0020
Epoch 23/50, Loss: 0.0072
Epoch 24/50, Loss: 0.0307
Epoch 25/50, Loss: 0.0080
Epoch 26/50, Loss: 0.0021
Epoch 27/50, Loss: 0.0019
Epoch 28/50, Loss: 0.0019
Epoch 29/50, Loss: 0.0018
Epoch 30/50, Loss: 0.0019
Epoch 31/50, Loss: 0.0019
Epoch 32/50, Loss: 0.0019
Epoch 33/50, Loss: 0.0020
Epoch 34/50, Loss: 0.0019
Epoch 35/50, Loss: 0.0019
Epoch 36/50, Loss: 0.0021
Epoch 37/50, Loss: 0.0019
Epoch 38/50, Loss: 0.0019
Epoch 39/50, Loss: 0.

In [21]:
def calculate_accuracy(model, data_loader):
    model.eval()
    correct_predictions = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, targets in data_loader:
            inputs, targets = inputs.float(), targets
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            correct_predictions += (predicted == targets).sum().item()
            total_samples += targets.size(0)

    accuracy = correct_predictions / total_samples
    return accuracy

In [22]:
calculate_accuracy(model, val_loader)

0.6973953013278856