In [1]:
!git clone https://github.com/MarcusOrtiz/Questin-Answer-Rater-NLP.git
%cd Questin-Answer-Rater-NLP/discrimitive
# %cd discrimitive/

Cloning into 'Questin-Answer-Rater-NLP'...
remote: Enumerating objects: 194, done.[K
remote: Counting objects: 100% (194/194), done.[K
remote: Compressing objects: 100% (141/141), done.[K
remote: Total 194 (delta 93), reused 139 (delta 47), pack-reused 0[K
Receiving objects: 100% (194/194), 6.24 MiB | 3.54 MiB/s, done.
Resolving deltas: 100% (93/93), done.
/content/Questin-Answer-Rater-NLP/discrimitive


In [2]:
import torch.nn as nn
import torch.optim as optim
import torch
from load_datasets import load_datasets
from torch.nn.utils.rnn import pad_sequence
from qa import create_model
from torch.utils.data import Dataset, DataLoader

tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [3]:
%cd ..

/content/Questin-Answer-Rater-NLP


In [5]:
TRAIN_DATA_NAME = 'train_formatted_output_w_comma.json'
VAL_DATA_NAME = 'valid_formatted_output_w_comma.json'
TEST_DATA_NAME = 'test_formatted_output_w_comma.json'

BATCH_SIZE = 15
SHUFFLE = True
EPOCHS = 20
LEARNING_RATE = 0.005
PATIENCE = 10

device = 'cuda' if torch.cuda.is_available() else 'cpu'

def collate_batch(batch):
    questions, answers, scores = zip(*batch)

    # Pad questions and answers to have the same length within each batch
    questions_padded = pad_sequence(questions, batch_first=True, padding_value=0)  # Assuming 0 is your padding index
    answers_padded = pad_sequence(answers, batch_first=True, padding_value=0)
    scores = torch.tensor(scores, dtype=torch.float)

    return questions_padded, answers_padded, scores


def train():
    train_data, val_data, test_data, vocab = load_datasets(TRAIN_DATA_NAME, VAL_DATA_NAME, TEST_DATA_NAME)

    model = create_model(len(vocab)).to(device)

    qa_train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=SHUFFLE, collate_fn=collate_batch)
    qa_val_loader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=SHUFFLE, collate_fn=collate_batch)

    # Define the loss function and optimizer
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
    loss_fn = nn.MSELoss()

    best_val_loss = float('inf')
    epochs_no_improve = 0
    for epoch in range(EPOCHS):
        model.train()
        train_loss = 0
        for questions, answers, scores in qa_train_loader:
            questions, answers, scores = questions.to(device), answers.to(device), scores.to(device)
            # Forward pass
            predictions = model(questions, answers).squeeze()
            # print(f'train predictions: {predictions}')
            # print(f'train scores: {scores}')
            # Compute the loss
            loss = loss_fn(predictions, scores)

            # Backward pass and optimization
            optimizer.zero_grad()  # Clear existing gradients
            loss.backward()  # Backpropagation
            optimizer.step()  # Update weights
            train_loss += loss.item()

        train_loss = train_loss / len(qa_train_loader)

        model.eval()
        val_accuracy = 0
        val_loss = 0
        total_scores = 0
        with torch.no_grad():
            printCount = 10
            for questions, answers, scores in qa_val_loader:
                questions, answers, scores = questions.to(device), answers.to(device), scores.to(device)
                predictions = model(questions, answers).squeeze()
                # print(f'val predictions: {predictions}')
                # print(f'val scores: {scores}')
                loss = loss_fn(predictions, scores)
                val_loss += loss.item()

                # accuracy
                diff = torch.abs(predictions - scores)
                accurate = torch.where(diff < 0.5, torch.ones_like(diff), torch.zeros_like(diff))
                val_accuracy += torch.sum(accurate).item()
                total_scores += len(scores)
                if printCount > 0:
                    print(f'val predictions: {predictions}')
                    print(f'val scores: {scores}')
                    print(f'val diff: {diff}')
                    print(f'val accurate: {accurate}')
                    print(f'val accuracy: {val_accuracy}')
                    print(f'val total_scores: {total_scores}')
                    printCount -= 1

        val_loss = val_loss / len(qa_val_loader)
        val_accuracy = val_accuracy / total_scores

        # Print statistics
        print(f"Epoch {epoch},"
              f"Training Loss: {train_loss}, Validation Loss: {val_loss}, "
              f"Val Accuracy: {val_accuracy}")

        # Early Stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_no_improve = 0
            # Save the model if it's the best so far
            torch.save(model.state_dict(), 'best_model.pth')
        else:
            epochs_no_improve += 1

        if epochs_no_improve == PATIENCE:
            print(f'Early stopping! Epoch: {epoch}')
            break

    torch.save(model.state_dict(), 'model_ending_1.pth')

In [None]:
train()

val predictions: tensor([2.4866, 2.2014, 2.5249, 2.4906, 2.5248, 2.4887, 2.5249, 2.4893, 2.4900,
        2.5249, 2.5249, 2.4913, 2.4825, 2.1654, 2.2014], device='cuda:0')
val scores: tensor([3., 3., 4., 1., 4., 4., 2., 2., 2., 2., 3., 1., 4., 1., 1.],
       device='cuda:0')
val diff: tensor([0.5134, 0.7986, 1.4751, 1.4906, 1.4752, 1.5113, 0.5249, 0.4893, 0.4900,
        0.5249, 0.4751, 1.4913, 1.5175, 1.1654, 1.2014], device='cuda:0')
val accurate: tensor([0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0., 0., 0.],
       device='cuda:0')
val accuracy: 3.0
val total_scores: 15
val predictions: tensor([2.1675, 2.4891, 2.5249, 2.4854, 2.5249, 2.4822, 2.1663, 2.4902, 2.4880,
        2.4854, 2.5249, 2.4895, 2.5249, 2.4885, 2.4911], device='cuda:0')
val scores: tensor([4., 1., 1., 1., 2., 4., 1., 3., 4., 1., 3., 4., 2., 3., 4.],
       device='cuda:0')
val diff: tensor([1.8325, 1.4891, 1.5249, 1.4854, 0.5249, 1.5178, 1.1663, 0.5098, 1.5120,
        1.4854, 0.4751, 1.5105, 0.5249, 0.5115, 