In [None]:
!pip install datasets

In [2]:
import numpy as np
from google.colab import drive
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, GenerationConfig
from datasets import load_dataset
import os
import re

drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Parameters

model_name = "t5-small"
max_input_length = 32
max_target_length = 8
num_beams = 20

In [4]:
# Returns last checkpoint, given list of files in a directory

def find_checkpoint(list_of_files):
    max = 0
    for filename in list_of_files:
        if "checkpoint" in filename:
            num = int(re.findall("\d+", filename)[-1])
            if num > max:
                max = num
    return max

In [None]:
# Paths to model and test dataset

path = "/content/drive/MyDrive/Projects/cryptic-crossword-solver/"
dir_name = model_name.split("/")[-1]
list_of_checkpoints = os.listdir(path = path + f"{dir_name}-fine-tuned/")
max_checkpoint = find_checkpoint(list_of_checkpoints)
print("Loading checkpoint no. ", max_checkpoint)
model_dir = path + f"{dir_name}-fine-tuned/checkpoint-{max_checkpoint}"

In [6]:
# Load model

tokenizer = AutoTokenizer.from_pretrained(model_dir)
model = AutoModelForSeq2SeqLM.from_pretrained(model_dir)

model.config.max_new_tokens = max_target_length
gen_config = GenerationConfig.from_pretrained(model_dir)
gen_config.update(
    max_new_tokens = max_target_length,
    num_beams=num_beams,
    num_return_sequences=num_beams,
    early_stopping=True
)
gen_config.validate()

In [7]:
# Checks if answer has the same number of letters as clue demands.
# (Ignores punctuation for simplicity)

def check_enum(clue, output):
    enum_str = re.findall("\(.*?\)", clue)[-1]
    enum = sum([int(i) for i in enum_str if i.isdigit()])
    ans_enum = sum(map(str.isalpha, output))
    if enum == ans_enum:
        return True
    else:
        return False

In [8]:
# Function to run model on a single crossword clue

def inference(
        data,
        prompt = "Solve the following cryptic crossword clue: ",
        max_input_length=max_input_length,
        gen_config=gen_config):

    # Add instruction
    data["clue"] =  prompt + data["clue"]

    # Tokenize
    input_ids = tokenizer.encode(data["clue"], truncation=True, max_length=max_input_length, return_tensors='pt')

    # Generate a few(= num_beams) possible answers
    device = model.device
    beam_outputs = model.generate(input_ids=input_ids.to(device), generation_config=gen_config)

    # Decode them all
    outputs_dec = tokenizer.batch_decode(beam_outputs, skip_special_tokens=True)

    # Choose best answer that fits enum
    test = False
    for output in outputs_dec:
        if check_enum(data["clue"], output):
            test = True
            break

    # If no answer fits, admit defeat
    if not test:
        output = "-1"

    data["pred"] = output

    return data

In [None]:
# Load test datasets

test_dataset_1 = load_dataset("json", data_files=path+"data_cryptic_test.json", split="train")
test_dataset_2 = load_dataset("json", data_files=path+"data_quiptic_full.json", split="train")

In [None]:
# Run inference on test datasets and save predictions to file

test_dataset_1 = test_dataset_1.map(inference)
test_dataset_1.to_json("data_cryptic_test_results.json")

test_dataset_2 = test_dataset_2.map(inference)
test_dataset_2.to_json("data_quiptic_full_results")

In [13]:
# Check accuracy

test_results_1 = test_dataset_1.filter(lambda x: x["pred"] == x["ans"])
test_results_2 = test_dataset_2.filter(lambda x: x["pred"] == x["ans"])

print("Percent Accuracy (Cryptic)= ", 100 * len(test_results_1)/len(test_dataset_1))
print("Percent Accuracy (Quiptic)= ", 100 * len(test_results_2)/len(test_dataset_2))

Percent Accuracy (Cryptic)=  18.394117647058824
Percent Accuracy (Quiptic)=  13.71505761702597


Previous result: Percent accuracy with training only on quiptics =  6.0088202866593166