In [1]:
import torch
import math
from transformers import AutoModelForCausalLM, AutoTokenizer

# Check if GPU is available and set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load the model and tokenizer
model_name = "google/gemma-2-2b-it"  # Replace with the specific model name, e.g., "EleutherAI/gpt-neo-1.3B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)  # Move model to GPU


  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda


Downloading shards: 100%|██████████| 2/2 [02:05<00:00, 62.93s/it] 
Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.44s/it]


In [2]:
input_text = """Write me a poem about Machine Learning."""
inputs = tokenizer(input_text, return_tensors="pt").to(device)
generated_ids = model.generate(
        inputs["input_ids"],
        max_new_tokens=50,
    )
print(tokenizer.decode(generated_ids[0]))

The 'max_batch_size' argument of HybridCache is deprecated and will be removed in v4.46. Use the more precisely named 'batch_size' argument instead.
Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


In [33]:
input_text = "Describe the sights and sounds of a bustling farmers' market."

inputs = tokenizer(input_text, return_tensors="pt").to(device)  

with torch.no_grad():
    generated_ids = model.generate(
        **inputs,
        max_length=80,
        eos_token_id=tokenizer.eos_token_id,  
        do_sample=True,
        temperature=0.5,
        top_k=30,
        top_p=0.9
    )
generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
print(f"LLM response: {generated_text}")

input_length = inputs["input_ids"].shape[1]  
generated_tokens = generated_ids[0][input_length:]  

with torch.no_grad():
    logits = model(generated_ids).logits

generated_logits = logits[0, input_length - 1:-1, :]  
generated_probs = torch.softmax(generated_logits, dim=-1)


token_probs = [generated_probs[i, token].item() for i, token in enumerate(generated_tokens)]

log_probs = [math.log(p) for p in token_probs if p > 0]
perplexity = math.exp(-sum(log_probs) / len(log_probs))

print(f"Probabilities: {token_probs}")
print(f"Perplexity: {perplexity}")

token_probs_dict = {tokenizer.decode(token): prob for token, prob in zip(generated_tokens, token_probs)}

print(token_probs_dict)

LLM response: Describe the sights and sounds of a bustling farmers' market.

**Sights:**

* **A vibrant tapestry of colors:** Baskets overflowing with ripe tomatoes, plump peaches, and vibrant purple eggplant.
* **The bounty of the season:** Rows of fresh herbs, leafy greens, and colorful vegetables in neat stacks.
* **The aroma of fresh produce:** The sweet scent of ripe strawberries
Probabilities: [0.7119027972221375, 0.5827901363372803, 0.799126923084259, 0.988342821598053, 0.9432348012924194, 0.9016574621200562, 0.9562946557998657, 0.9221857190132141, 0.1828395277261734, 0.23922263085842133, 0.4149479568004608, 0.9890844225883484, 0.5946714878082275, 0.8827874064445496, 0.08403302729129791, 0.8383487462997437, 0.9995378255844116, 0.23847390711307526, 0.2732764184474945, 0.8776777386665344, 0.07814806699752808, 0.43571847677230835, 0.7018110156059265, 0.6783096790313721, 0.2933952510356903, 0.3008807301521301, 0.5884259343147278, 0.7055785059928894, 0.08480974286794662, 0.9994035959

In [34]:
token_probs_dict_pad = {tokenizer.decode(token): prob for token, prob in zip(generated_tokens[1:], token_probs[1:])}


{'**': 0.5827901363372803, 'S': 0.799126923084259, 'ights': 0.988342821598053, ':**': 0.9804953336715698, '\n\n': 0.9016574621200562, '*': 0.9997157454490662, ' **': 0.9994121789932251, 'A': 0.1828395277261734, ' vibrant': 0.2933952510356903, ' tapestry': 0.4149479568004608, ' of': 0.9985711574554443, ' colors': 0.5946714878082275, ' Baskets': 0.08403302729129791, ' overflowing': 0.8383487462997437, ' with': 0.9995378255844116, ' ripe': 0.34664884209632874, ' tomatoes': 0.2732764184474945, ',': 0.9008296132087708, ' plump': 0.07814806699752808, ' peaches': 0.43571847677230835, ' and': 0.934029757976532, ' purple': 0.3008807301521301, ' eggplant': 0.5884259343147278, '.': 0.6612859964370728, '\n': 0.8466033339500427, 'The': 0.17444390058517456, ' bounty': 0.03374568372964859, ' the': 0.7284719347953796, ' season': 0.2748064398765564, ' Rows': 0.2113439291715622, ' fresh': 0.3107593357563019, ' herbs': 0.581920862197876, ' leafy': 0.17683684825897217, ' greens': 0.9270228743553162, ' col

In [40]:

import time
def generate_response(prompt):
    input_text = prompt

    inputs = tokenizer(input_text, return_tensors="pt").to(device)  

    start_time = time.time()
    with torch.no_grad():
        generated_ids = model.generate(
            **inputs,
            max_length=80,
            eos_token_id=tokenizer.eos_token_id,  
            do_sample=True,
            temperature=0.5,
            top_k=30,
            top_p=0.9
        )
    end_time = time.time()
    resp_time = end_time - start_time
    generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    # print(f"Generated text: {generated_text}")

    input_length = inputs["input_ids"].shape[1]  
    generated_tokens = generated_ids[0][input_length:]  

    with torch.no_grad():
        logits = model(generated_ids).logits

    generated_logits = logits[0, input_length - 1:-1, :]  
    generated_probs = torch.softmax(generated_logits, dim=-1)


    token_probs = [generated_probs[i, token].item() for i, token in enumerate(generated_tokens)]

    log_probs = [math.log(p) for p in token_probs if p > 0]
    perplexity = math.exp(-sum(log_probs) / len(log_probs))

    # print(f"Token probabilities: {token_probs}")
    # print(f"Perplexity: {perplexity}")

# starting from the second token, because the first one is always an endline
    token_probs_dict = {tokenizer.decode(token): prob for token, prob in zip(generated_tokens[1:], token_probs[1:])}
    
    only_output_text = generated_text[len(input_text):]
    return {"response_time":resp_time, "generated_text": only_output_text[2:], "perplexity": perplexity, "token_probabilities": token_probs_dict}


In [41]:
import json 

with open("responses.json", "r") as file:
    data = json.load(file)

new_output = {}

for prompt in data.keys():
    print(prompt)
    resp = generate_response(prompt)
    new_output[prompt] = resp

    with open("responses_new.json", "w") as file:
        json.dump(new_output, file, indent=4)
    

Describe the sights and sounds of a bustling farmers' market.
Write about a sunrise you saw from the top of a mountain.
Imagine walking through a rainforestâ€”describe the experience.
Write about a dog chasing a frisbee in a park.
Describe the moment a child learns to ride a bicycle.
Write about cooking a favorite meal and the smells that fill the kitchen.
Describe a walk through a quiet snow-covered forest.
Write about the first time you swam in the ocean.
Describe the scene of a carnival at night.
Write about planting flowers in a garden.
Describe a picnic on a sunny day by a lake.
Write about the moment before a race starts.
Describe a train journey through scenic landscapes.
Write about feeding ducks in a pond.
Describe a library filled with old, dusty books.
Write about climbing a tree as a child.
Describe the experience of tasting a new fruit for the first time.
Write about a thunderstorm rolling in on a summer evening.
Describe the atmosphere of an outdoor concert.
Write about a