In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


A path to a custom database made for this project using the create_database.ipynb we made

In [None]:
database_path = '/content/drive/MyDrive/Discord_Manager_files/discord_commands.jsonl'

Relevant imports:

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import json
from transformers import BertModel, BertTokenizer, GPT2Model, GPT2Tokenizer
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

Implementing BERT model for multilabel classifications:
in this case we must classify the input action, username and role.
while all of the commands the bot will support require action and username, some commands wont use "role" and therefore will be

classified as < ROLE_100 >

In [None]:
class BertForMultiLabel(nn.Module):
    def __init__(self, num_actions, num_users, num_roles):
        super(BertForMultiLabel, self).__init__()
        self.bert = BertModel.from_pretrained("bert-base-uncased")
        self.dropout = nn.Dropout(0.1)

        # Classification heads
        self.action_classifier = nn.Linear(self.bert.config.hidden_size, num_actions)
        self.user_classifier = nn.Linear(self.bert.config.hidden_size, num_users)
        self.role_classifier = nn.Linear(self.bert.config.hidden_size, num_roles)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output

        action_result = self.action_classifier(pooled_output)
        user_result = self.user_classifier(pooled_output)
        role_result = self.role_classifier(pooled_output)

        return action_result, user_result, role_result

In [None]:
class GPT2ForMultiLabel(nn.Module):
    def __init__(self, num_actions, num_users, num_roles):
        super(GPT2ForMultiLabel, self).__init__()
        self.gpt2 = GPT2Model.from_pretrained("gpt2")
        self.dropout = nn.Dropout(0.1)

        # Classification heads
        self.action_classifier = nn.Linear(self.gpt2.config.hidden_size, num_actions)
        self.user_classifier = nn.Linear(self.gpt2.config.hidden_size, num_users)
        self.role_classifier = nn.Linear(self.gpt2.config.hidden_size, num_roles)

    def forward(self, input_ids, attention_mask):
        outputs = self.gpt2(input_ids, attention_mask=attention_mask)

        # Get the last token representation
        last_hidden_state = outputs.last_hidden_state
        pooled_output = last_hidden_state[:, -1, :]  # Use the last token's hidden state

        action_result = self.action_classifier(pooled_output)
        user_result = self.user_classifier(pooled_output)
        role_result = self.role_classifier(pooled_output)

        return action_result, user_result, role_result



A class made to convert the dataset into a tokenized version of input and output so the model could handle it.

In [None]:
class DiscordDataset(Dataset):
    def __init__(self, data, action_map, user_map, role_map, tokenizer):
        self.data = data
        self.tokenizer = tokenizer
        self.action_map = action_map
        self.user_map = user_map
        self.role_map = role_map

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        input_text = sample["input"]
        action = self.action_map[sample["output"]["action"]]
        user = self.user_map[sample["output"]["user"]]
        if "role" in sample["output"]: # Some actions may not have roles
            role = self.role_map[sample["output"]["role"]]
        else:
            role = self.role_map["<ROLE_100>"]

        encoding = self.tokenizer(input_text, padding="max_length", truncation=True, max_length=64, return_tensors="pt")

        return {
            "input_ids": encoding["input_ids"].squeeze(),
            "attention_mask": encoding["attention_mask"].squeeze(),
            "action": torch.tensor(action, dtype=torch.long),
            "user": torch.tensor(user, dtype=torch.long),
            "role": torch.tensor(role, dtype=torch.long)
        }

Defining user, role and action maps

In [None]:
user_index_map = {f"<USER_{i+1}>": i for i in range(100)}
role_index_map = {f"<ROLE_{i+1}>": i for i in range(100)}
action_map = {"ban": 0, "add_role": 1, "remove_role": 2}

We train the model, first by using the dataset that we specifically created for this task.

In [None]:
raw_dataset = []
with open(database_path, "r") as file:
  for line in file:
    line = line.strip()
    raw_dataset.append(json.loads(line))

Splitting the dataset to train, test and validation.

Training the model and printing its progress: (training bert, and GPT2)

In [None]:
should_run = False      #if you dont have a version of the trained model on your drive change this to True (retrain the model)
if should_run:
  tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
  train_data, val_data = train_test_split(raw_dataset, test_size=0.1, random_state=42) # :)
  train_dataset = DiscordDataset(train_data, action_map, user_index_map, role_index_map, tokenizer)
  val_dataset = DiscordDataset(val_data, action_map, user_index_map, role_index_map, tokenizer)
  train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
  val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)


  model = BertForMultiLabel(num_actions=len(action_map), num_users=len(user_index_map), num_roles=len(role_index_map))
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  model.to(device)
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.AdamW(model.parameters(), lr=3e-5)
  num_epochs = 6
  for epoch in range(num_epochs):
      model.train()
      total_train_loss = 0

      for batch in train_loader:
          input_ids = batch["input_ids"].to(device)
          attention_mask = batch["attention_mask"].to(device)
          action_labels = batch["action"].to(device)
          user_labels = batch["user"].to(device)
          role_labels = batch["role"].to(device)

          optimizer.zero_grad()
          action_logits, user_logits, role_logits = model(input_ids, attention_mask)

          loss = (
              criterion(action_logits, action_labels) +
              criterion(user_logits, user_labels) +
              criterion(role_logits, role_labels)
          )

          loss.backward()
          optimizer.step()
          total_train_loss += loss.item()

      avg_train_loss = total_train_loss / len(train_loader)

      # Validation Step
      model.eval()
      total_val_loss = 0
      correct_actions, correct_users, correct_roles = 0, 0, 0
      total_samples = 0

      with torch.no_grad():
          for batch in val_loader:
              input_ids = batch["input_ids"].to(device)
              attention_mask = batch["attention_mask"].to(device)
              action_labels = batch["action"].to(device)
              user_labels = batch["user"].to(device)
              role_labels = batch["role"].to(device)

              action_logits, user_logits, role_logits = model(input_ids, attention_mask)

              loss = (
                  criterion(action_logits, action_labels) +
                  criterion(user_logits, user_labels) +
                  criterion(role_logits, role_labels)
              )
              total_val_loss += loss.item()

              # Compute Accuracy
              correct_actions += (torch.argmax(action_logits, dim=1) == action_labels).sum().item()
              correct_users += (torch.argmax(user_logits, dim=1) == user_labels).sum().item()
              correct_roles += (torch.argmax(role_logits, dim=1) == role_labels).sum().item()
              total_samples += input_ids.size(0)

      avg_val_loss = total_val_loss / len(val_loader)
      action_acc = correct_actions / total_samples
      user_acc = correct_users / total_samples
      role_acc = correct_roles / total_samples

      print(f"Epoch {epoch+1}/{num_epochs}")
      print(f"Train Loss: {avg_train_loss:.4f}")
      print(f"Val Loss: {avg_val_loss:.4f}")
      print(f"Action Acc: {action_acc:.4f}, User Acc: {user_acc:.4f}, Role Acc: {role_acc:.4f}")

  # Save the trained model
  torch.save(model.state_dict(), '/content/drive/MyDrive/Discord_Manager_files/bert_discord_model.pth')

In [None]:
should_run = False     #if you dont have a version of the trained model on your drive change this to True (retrain the model)
if should_run:
  tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
  tokenizer.pad_token = tokenizer.eos_token

  train_data, val_data = train_test_split(raw_dataset, test_size=0.1, random_state=42) # :)
  train_dataset = DiscordDataset(train_data, action_map, user_index_map, role_index_map, tokenizer)
  val_dataset = DiscordDataset(val_data, action_map, user_index_map, role_index_map, tokenizer)
  train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
  val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

  model = GPT2ForMultiLabel(num_actions=len(action_map), num_users=len(user_index_map), num_roles=len(role_index_map))
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  model.to(device)
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.AdamW(model.parameters(), lr=3e-5)
  num_epochs = 6
  for epoch in range(num_epochs):
      model.train()
      total_train_loss = 0

      for batch in train_loader:
          input_ids = batch["input_ids"].to(device)
          attention_mask = batch["attention_mask"].to(device)
          action_labels = batch["action"].to(device)
          user_labels = batch["user"].to(device)
          role_labels = batch["role"].to(device)

          optimizer.zero_grad()
          action_logits, user_logits, role_logits = model(input_ids, attention_mask)

          loss = (
              criterion(action_logits, action_labels) +
              criterion(user_logits, user_labels) +
              criterion(role_logits, role_labels)
          )

          loss.backward()
          optimizer.step()
          total_train_loss += loss.item()

      avg_train_loss = total_train_loss / len(train_loader)

      # Validation Step
      model.eval()
      total_val_loss = 0
      correct_actions, correct_users, correct_roles = 0, 0, 0
      total_samples = 0

      with torch.no_grad():
          for batch in val_loader:
              input_ids = batch["input_ids"].to(device)
              attention_mask = batch["attention_mask"].to(device)
              action_labels = batch["action"].to(device)
              user_labels = batch["user"].to(device)
              role_labels = batch["role"].to(device)

              action_logits, user_logits, role_logits = model(input_ids, attention_mask)

              loss = (
                  criterion(action_logits, action_labels) +
                  criterion(user_logits, user_labels) +
                  criterion(role_logits, role_labels)
              )
              total_val_loss += loss.item()

              # Compute Accuracy
              correct_actions += (torch.argmax(action_logits, dim=1) == action_labels).sum().item()
              correct_users += (torch.argmax(user_logits, dim=1) == user_labels).sum().item()
              correct_roles += (torch.argmax(role_logits, dim=1) == role_labels).sum().item()
              total_samples += input_ids.size(0)

      avg_val_loss = total_val_loss / len(val_loader)
      action_acc = correct_actions / total_samples
      user_acc = correct_users / total_samples
      role_acc = correct_roles / total_samples

      print(f"Epoch {epoch+1}/{num_epochs}")
      print(f"Train Loss: {avg_train_loss:.4f}")
      print(f"Val Loss: {avg_val_loss:.4f}")
      print(f"Action Acc: {action_acc:.4f}, User Acc: {user_acc:.4f}, Role Acc: {role_acc:.4f}")

  # Save the trained model
  torch.save(model.state_dict(), '/content/drive/MyDrive/Discord_Manager_files/gpt2_discord_model.pth')

Prediction phase- the model's output.
can be used to predict single sample outputs
(the main function of prediction we use in responses)

In [None]:
def predict(model, tokenizer, text):
    model.eval()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    encoding = tokenizer(text, return_tensors="pt", padding="max_length", truncation=True, max_length=64)
    input_ids = encoding["input_ids"].to(device)
    attention_mask = encoding["attention_mask"].to(device)

    with torch.no_grad():
        action_logits, user_logits, role_logits = model(input_ids, attention_mask)

    action = torch.argmax(action_logits, dim=1).item()
    user = torch.argmax(user_logits, dim=1).item()
    role = torch.argmax(role_logits, dim=1).item()

    action_name = [k for k, v in action_map.items() if v == action][0]
    user_name = f"<USER_{user+1}>"  # Convert index to placeholder
    role_name = f"<ROLE_{role+1}>"
    return {"action": action_name, "user": user_name, "role": role_name}
