## Import modules

In [56]:
import math
import logging
import torch
import warnings
warnings.filterwarnings("ignore")

from transformers import (
    GPT2LMHeadModel,
    GPT2Tokenizer,
    GPTNeoForCausalLM,
    GPT2TokenizerFast
)

## Logging setup

In [None]:

# Set up logging configuration
logging.basicConfig(
    filename="autocomplete.log",
    level=logging.INFO,
    format="%(asctime)s | %(message)s"
)
logger = logging.getLogger()

## Load Models & Tokenizers

In [None]:
models = {
    "gpt2": {
        "model": GPT2LMHeadModel.from_pretrained("gpt2"),
        "tokenizer": GPT2Tokenizer.from_pretrained("gpt2")
    },
    "gpt_neo": {
        "model": GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-125M"),
        "tokenizer": GPT2TokenizerFast.from_pretrained("EleutherAI/gpt-neo-125M")
    }
}

# GPT models do not have a pad token, so set pad = eos
for entry in models.values():
    tokenizer = entry["tokenizer"]
    tokenizer.pad_token = tokenizer.eos_token
    entry["model"].eval()


Loading weights: 100%|██████████| 148/148 [00:00<00:00, 2302.53it/s, Materializing param=transformer.wte.weight]             
GPT2LMHeadModel LOAD REPORT from: gpt2
Key                  | Status     |  | 
---------------------+------------+--+-
h.{0...11}.attn.bias | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
Loading weights: 100%|██████████| 160/160 [00:00<00:00, 2197.20it/s, Materializing param=transformer.wte.weight]                         
GPTNeoForCausalLM LOAD REPORT from: EleutherAI/gpt-neo-125M
Key                                                   | Status     |  | 
------------------------------------------------------+------------+--+-
transformer.h.{0, 2, 4, 6, 8, 10}.attn.attention.bias | UNEXPECTED |  | 
transformer.h.{0...11}.attn.attention.masked_bias     | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect

## Stopping Criteria

In [15]:
def stop_at_sentence(text):
    """
    Stop generation at the first end-of-sentence token.
    """
    if "." in text:
        return text[: text.find(".") + 1]
    return text


## Autocomplete Function


In [None]:
def autocomplete(
    prompt,
    model_name,
    num_outputs=3,
    max_new_tokens=40,
    top_k=30,
    temperature=0.45
):
    """
    Function to generate sentence completions using a specified model and parameters.
    """
    entry = models[model_name]
    model = entry["model"]
    tokenizer = entry["tokenizer"]

    # Log the generation parameters with the model name and prompt
    logger.info("=" * 60)
    logger.info(f"MODEL: {model_name}")
    logger.info(f"PROMPT: {prompt}")
    logger.info(
        f"PARAMS | top_k={top_k}, temperature={temperature}, "
        f"max_new_tokens={max_new_tokens}"
    )

    # Encode the prompt
    input_enc = tokenizer(
        prompt,
        return_tensors="pt",
        padding=True
    )

    # Generate outputs with the specified parameters
    outputs = model.generate(
        input_ids=input_enc.input_ids,
        attention_mask=input_enc.attention_mask,
        do_sample=True,
        top_k=top_k,
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        num_return_sequences=num_outputs,
        pad_token_id=tokenizer.eos_token_id
    )

    # Decode and log each generated output
    completions = []
    for i, out in enumerate(outputs, start=1):
        text = tokenizer.decode(out, skip_special_tokens=True)
        text = stop_at_sentence(text)
        completions.append(text)
        logger.info(f"OUTPUT {i}: {text}")

    return completions


## Evaluation

In [None]:
def calculate_perplexity(text, model, tokenizer):
    """
    Computes perplexity (Fluency).
    """
    enc = tokenizer(text, return_tensors="pt", padding=True)

    with torch.no_grad():
        outputs = model(
            input_ids=enc.input_ids,
            attention_mask=enc.attention_mask,
            labels=enc.input_ids
        )

    return math.exp(outputs.loss.item())

In [None]:
def distinct_2(sentences):
    """
    Computes Distinct-2 (bigram diversity).
    """
    bigrams = []

    for s in sentences:
        words = s.split()
        bigrams.extend(
            [tuple(words[i:i+2]) for i in range(len(words) - 1)]
        )
    return len(set(bigrams)) / max(1, len(bigrams))


## Model Comparison

In [42]:
prompts = [
    "The team is planning to",
    "AI can help improve",
    "She decided to invest in",
    "Machine learning is used for",
    "I am thinking of traveling to"
]


In [None]:
evaluation_results = []

for model_name, entry in models.items():
    model = entry["model"]
    tokenizer = entry["tokenizer"]

    ppl_scores = []
    diversity_scores = []

    # Generate outputs for each prompt and evaluate them
    for prompt in prompts:
        outputs = autocomplete(
            prompt=prompt,
            model_name=model_name
        )

        # Perplexity
        avg_ppl = sum(
            calculate_perplexity(o, model, tokenizer)
            for o in outputs
        ) / len(outputs)

        ppl_scores.append(avg_ppl)

        # Diversity
        diversity_scores.append(distinct_2(outputs))

    # Store average metrics for the model
    evaluation_results.append({
        "Model": model_name,
        "Avg Perplexity": round(sum(ppl_scores) / len(ppl_scores), 2),
        "Avg Distinct-2": round(sum(diversity_scores) / len(diversity_scores), 2)
    })


In [48]:
results_df = pd.DataFrame(evaluation_results)
results_df


Unnamed: 0,Model,Avg Perplexity,Avg Distinct-2
0,gpt2,13.84,0.69
1,gpt_neo,12.14,0.7


#### Lower perplexity and higher Distinct scores indicate better performance; based on this trade-off, the GPT-Neo model was selected.

In [51]:
final_model = "gpt_neo"
prompt = "AI can help improve"

results = autocomplete(prompt, final_model)

for i, r in enumerate(results, start=1):
    print(f"{i}. {r}")


1. AI can help improve the quality of the medical care it provides, but it also can help improve the efficiency of the hospital.
2. AI can help improve the quality of life of patients with diabetes by improving the insulin sensitivity and insulin secretion.
3. AI can help improve the quality of life of people with dementia.
