In [None]:
import pandas as pd

import torch
import torch.nn as nn
from torch import cuda
from torch.utils.data import Dataset, DataLoader

from transformers import AutoModel, AutoTokenizer

### Info

In [None]:
device = 'cuda' if cuda.is_available() else 'cpu'

MAX_LEN = 150
BATCH_SIZE = 16
EPOCHS = 1
LEARNING_RATE = 1e-05
DISTIL_BERT_CHECKPOINT = 'distilbert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(DISTIL_BERT_CHECKPOINT)

### Dataset and dataloader

In [None]:
class QuoraDataset(Dataset):

    def __init__(self, file_path, tokenizer, max_len):
        self._dataset = pd.read_csv(file_path, low_memory=False)
        self._tokenizer = tokenizer 
        self._max_len = max_len

    def __getitem__(self, index):
        text = self._dataset.iloc[index]["question_text"]
        inputs = self._tokenizer(
            [text],
            truncation=True, 
            return_tensors="pt",
            max_length=self._max_len,
            pad_to_max_length=True
        )

        return {
            "ids": inputs["input_ids"],
            "mask": inputs["attention_mask"],
            "target": torch.tensor(self._dataset.iloc[index]["target"], dtype=torch.long)
        }

    def __len__(self):
        return len(self._dataset)

In [None]:
train_dataset = QuoraDataset("../data/processed/train_resampled.csv", tokenizer, MAX_LEN)
# add test + preprocessing later...

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

### DistilBert Model

In [None]:
# BertClass
class DistilBertModel(nn.Module):

    def __init__(self):
        super(DistilBertModel, self).__init__()
        self.distil_bert = AutoModel.from_pretrained(DISTIL_BERT_CHECKPOINT)
        self.drop1 = nn.Dropout(0.2)
        self.linear1 = nn.Linear(768, 1)
    
    def forward(self, ids, mask):
        x = self.distil_bert(ids, mask)
        x = self.drop1(x[0])
        x = self.linear1(x)
        return x

model = DistilBertModel()
model.to(device)

### Training

In [None]:
# Creating the loss function and optimizer
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

In [None]:
def train(epoch=1):
    model.train()

    for idx, inputs in enumerate(train_loader):
        
        ids = inputs['ids'].squeeze(1).to(device)
        mask = inputs['mask'].squeeze(1).to(device)
        target = inputs['target'].to(device)

        output = model(ids, mask).squeeze()

        optimizer.zero_grad()

        l = loss(output, target)
        l.backward()

        optimizer.step()

        print(f'Epoch: {epoch}, {idx}/{len(train_loader)}, Loss:  {l.item()}')

In [None]:
train()