# Inference notebook

In [1]:
import os
from pathlib import Path

import torch
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification

from utils.params_parser import ModelConfig
from utils.build_model import CustomModel
from utils.inference_utils import get_grade_from_predictions

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
MODEL_PATH = "/root/repo/Feedback-Prize-ELL/models/model.pt"
DATA_PATH = "/root/repo/Feedback-Prize-ELL/data/processed/feedback_data.csv"

In [None]:
model_config = ModelConfig("bert-base-uncased", "bert", "lstm", 2, 768, 6)

In [None]:
text_col: str = "full_text"
label_cols: list = ["cohesion",
            "syntax",
            "vocabulary",
            "phraseology",
            "grammar",
            "conventions"]

In [4]:
# Load model
model = CustomModel(model_config)
model.load_state_dict(torch.load(MODEL_PATH))

In [None]:
# Load data
data = pd.read_csv(DATA_PATH)

In [None]:
# Inference function
def inference(model, data):
    """
    Inference function.

    Args:
        model: Model.
        data: Data.

    Returns:
        predictions: Predictions.
    """
    # Set up device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Set up tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_config.model_checkpoint)

    # Initialize predictions
    predictions = []

    # Iterate over data
    for _, row in data.iterrows():
        # Get text
        text = row[text_col]
        labels = row[label_cols]

        # Tokenize text
        inputs = tokenizer(
            text,
            return_tensors="pt",
            max_length=512,
            truncation=True,
            padding="max_length",
        )

        # Get inputs
        input_ids = inputs["input_ids"].to(device)
        attention_mask = inputs["attention_mask"].to(device)

        # Get outputs
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)

        # Get predictions
        prediction = outputs[0].cpu().detach().numpy().tolist()
        predictions.append((prediction, labels.tolist()))

    return predictions

In [None]:
# Get predictions
predictions = inference(model, data)
predicted_grades = get_grade_from_predictions(predictions)

In [None]:
# Print predictions
print(predicted_grades)