<a href="https://colab.research.google.com/github/MangoGrove/MangoGrove.github.io/blob/main/HWK04/HWK_04.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/aarsri/nlp_hw4.git

Cloning into 'nlp_hw4'...
remote: Enumerating objects: 21, done.[K
remote: Counting objects: 100% (21/21), done.[K
remote: Compressing objects: 100% (20/20), done.[K
remote: Total 21 (delta 6), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (21/21), 15.91 KiB | 7.96 MiB/s, done.
Resolving deltas: 100% (6/6), done.


In [None]:
import sys

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

In [None]:
class LanguageModel:

    def __init__(self, model_name='gpt2', device=None, mode='greedy', k=None, p=None, temperature=1.0):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForCausalLM.from_pretrained(model_name)
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)

        self.mode = mode
        self.k = k
        self.p = p
        self.temperature = temperature

    def start(self, text):
        """Tokenize input string and return model-ready tensors."""
        inputs = self.tokenizer(text, return_tensors='pt') # adds BOS/EOS by default, returns numberized tokens
        return {k: v.to(self.device) for k, v in inputs.items()}

    def step(self, state):
        """Perform one decoding step given current state."""
        with torch.no_grad():
            outputs = self.model(**state)
            next_token = self.decoding_algorithm(outputs)
            # Append new token to input
            state['input_ids'] = torch.cat([state['input_ids'], next_token.unsqueeze(0)], dim=1)
            state['attention_mask'] = torch.cat(
                [state['attention_mask'], torch.ones((1,1), device=self.device)], dim=1
            )
        return state

    def decoding_algorithm(self, outputs):
        """Choose the next token according to the selected decoding strategy."""

        # TODO: use self.temperature to incorporate temperature sampling

        logits = outputs.logits[:, -1, :]
        logits = logits/self.temperature
        probs = torch.softmax(logits, dim=-1).squeeze(0)

        if self.mode == 'greedy':
            # TODO: apply decoding method to obtain next token
            next_token = torch.argmax(probs)
            next_token = next_token.unsqueeze(0) #torch.tensor([[next_token]])


        elif self.mode == 'sampling':
            # TODO: apply decoding method to obtain next token
            next_token = torch.multinomial(probs, num_samples=1)


        elif self.mode == 'top-k':
            # TODO: apply decoding method to obtain next token
            top_k_probs, top_k_indices = torch.topk(probs, k=self.k)
            next_token = torch.multinomial(top_k_probs, num_samples=1)
            next_token = torch.gather(top_k_indices, 0, next_token)
            #next_token = torch.unsqueeze(next_token, 0)

        elif self.mode == 'top-p':
            # TODO: apply decoding method to obtain next token
            sorted_probs, sorted_indices = torch.sort(probs, descending=True)
            cumulative_probs = torch.cumsum(sorted_probs, dim=-1)

            indices_to_remove = cumulative_probs > self.p
            # Ensure at least one token is kept
            indices_to_remove[..., 0] = False

            sorted_probs[indices_to_remove] = 0.0
            # Renormalize the probabilities if some were set to zero
            sorted_probs = sorted_probs / sorted_probs.sum() if sorted_probs.sum() > 0 else sorted_probs

            sampled_idx_in_sorted = torch.multinomial(sorted_probs, num_samples=1)
            next_token = sorted_indices[sampled_idx_in_sorted] # Ensures shape [1]


        return next_token

    # The `generate()` method below is NOT HuggingFace's built-in `.generate()`.
    # It simply runs our custom decoding loop using your implementation of greedy search, sampling, top-k, and top-p.
    # You may NOT use `model.generate()` from the HuggingFace Transformers library.
    def generate(self, prompt, max_new_tokens=40):
        """Generate a continuation from a given prompt."""
        state = self.start(prompt)
        for _ in range(max_new_tokens):
            state = self.step(state)
        output_ids = state['input_ids'].squeeze().tolist()
        return self.tokenizer.decode(output_ids, skip_special_tokens=True)



In [None]:
if __name__ == '__main__':
    with open('/content/nlp_hw4/storycloze-2018/short_context_data.txt') as f:
        contexts = [line.strip() for line in f if line.strip()]
        print(contexts)
    # TODO: run the model with different decoding methods and print the outputs (as outlined in the assignment)
    #Rows are temperature x k combinations (9 total rows),
    #columns are features, cells are filled with checks if the feature is present.
    lm = LanguageModel(mode='greedy')
    print(len(contexts))

    #temperatures = [0.2, 0.8, 1]
    #top_ks = [10, 40, 100]
    #results = []

    print("top k decoding")
    n = 1
    #for temperature in temperatures:
    #    for top_k in top_ks:
    top_k = 40
    lm = LanguageModel(mode='top-k', k=top_k) #, temperature=temperature)
    #all = []
    print(f"Top-k: {top_k}")
    for begining in contexts[:40]:
      output = lm.generate(begining, max_new_tokens=40)
      #all.append(output)
      print(f"{n}.  Context: {begining}\n→ {output}\n")
      n += 1
            #results.append({'temperature': temperature, 'top_k': top_k, 'outputs': all})
            #results.append(lm.generate(contexts[0]))
            #print(results)
            #print(lm.generate(contexts[0]))

    print("top p decoding")
    n = 0
    ps =  0.9
    lm = LanguageModel(mode='top-p', p=ps)
    for begining in contexts[:40]:
      output = lm.generate(begining, max_new_tokens=40)
      print(f"{n}.  Context: {begining}\n→ {output}\n")
      n += 1


    #for begining in contexts:
    #  lm.generate(begining)

['Rick grew up in a troubled household.', "Laverne needs to prepare something for her friend's party.", 'Sarah had been dreaming of visiting Europe for years.', 'Gina was worried the cookie dough in the tube would be gross.', 'It was  my final performance in marching band.', 'Jim found an old disposable camera in the bottom of his junk drawer.', 'Ron started his new job as a landscaper today.', 'John and Billy became very skilled at beer pong.', 'Caroline was a student in medical school.', 'Trish hated the outdoors.', 'Tony was happy to be going on his first cruise.', 'Ignacio wants to play a sport while he is in college.', 'Danny bought a boat.', 'At school, Mary received an assignment to write an essay about pandas.', 'Ellen dreamed of winning a prize for her roses.', 'Jesse had just started fifth grade.', 'Tiffany was getting overwhelmed at work.', 'Gina misplaced her phone at her grandparents.', 'Alice was getting married in a few weeks.', 'Ted loves to go to the movies.', 'Nya had

#Discuss how the outputs from top-k decoding differ from those obtained from greedy search and ancestral sampling. Explicitly tie your explanation to how top-k decoding works in contrast to greedy search and ancestral sampling. Is top-k decoding ever the same as greedy search?



---


Okay back to the base needs: at least it prints something AND it doesn't repeat like greedy does! Not only that it seeems to be more cohesive with the starting sentence than ancestral sampling is. However it doesn't do a good job in implying/sticking with a gender (though who knows, maybe there is a girl name Rick and I am just allowing my own bias influence my judgement). In that regard the most probabale tokens have helped a lot in structuing the story, however there are odd ones mixed in. That is to say even if it is cohesive it does get more random and at times doesn't quite fit with the prompt. Such as with the first prompt with Rick.

Such as → Laverne needs to prepare something for her friend's party. He needs to get a little light. That will probably take a lot of time, but maybe the idea works out. And there will also be some chance of him becoming pregnant. He'll need to


Idea aroung Top-K Decoding: Keep only the k most probable tokens, then sample from them. (k is a hyperparameter)

#Discuss how the outputs from top-p decoding differ from those obtained from the previous methods. Explicitly tie your explanation to how top-p decoding works in contrast to the other methods, particularly top-k.



---

Okay back to the base needs: it also, at least print something and it doesn't repeat like Greedy! Alongside there being a cohesive story that relates more to the given prompt (unline Top-K Decoding where it is somewhat cohesive, but doesn't necessarly relate to the prompt). This prbably has to do with the cumulative probability. Such as Rick now having his troubled past refrence. I will say though that obtaining gender is still a little odd, such as when Sarah is being refrenced.

Order the tokens in descending order of probability.
Select the smallest number of top tokens such that their cumulative probability is at least p (hyperparameter).
Sample from only those tokens.


* greedy sampling

In [None]:

if __name__ == '__main__':
    with open('/content/nlp_hw4/storycloze-2018/short_context_data.txt') as f:
        contexts = [line.strip() for line in f if line.strip()]
        print(contexts)
    # TODO: run the model with different decoding methods and print the outputs (as outlined in the assignment)
    #Rows are temperature x k combinations (9 total rows),
    #columns are features, cells are filled with checks if the feature is present.
    lm = LanguageModel(mode='greedy')
    print(len(contexts))

    temperatures = [0.2, 0.8, 1]
    top_ks = [10, 40, 100]
    results = []

    lm = LanguageModel(mode='greedy')
    n = 1
    for begining in contexts[:40]:
      output = lm.generate(begining, max_new_tokens=40)

      print(f"{n}.  Context: {begining}\n→ {output}\n")
      n += 1


['Rick grew up in a troubled household.', "Laverne needs to prepare something for her friend's party.", 'Sarah had been dreaming of visiting Europe for years.', 'Gina was worried the cookie dough in the tube would be gross.', 'It was  my final performance in marching band.', 'Jim found an old disposable camera in the bottom of his junk drawer.', 'Ron started his new job as a landscaper today.', 'John and Billy became very skilled at beer pong.', 'Caroline was a student in medical school.', 'Trish hated the outdoors.', 'Tony was happy to be going on his first cruise.', 'Ignacio wants to play a sport while he is in college.', 'Danny bought a boat.', 'At school, Mary received an assignment to write an essay about pandas.', 'Ellen dreamed of winning a prize for her roses.', 'Jesse had just started fifth grade.', 'Tiffany was getting overwhelmed at work.', 'Gina misplaced her phone at her grandparents.', 'Alice was getting married in a few weeks.', 'Ted loves to go to the movies.', 'Nya had

#Free response: What looks good and bad about the outputs from greedy search, and why? Explicitly tie your explanation to how greedy search works.


---

I think, as just something basic, that it's a good thing that the greedy
algorithm is producing at least something!  And even makes sence for some of them. However because Greedy is based on the highest probability (with not change afterwards) that leads to the problem of repitation. All of these sentence had some sort of repitition to it (and not re-interation, but straight repitition).

* ancestral sampling

In [None]:
if __name__ == '__main__':
    with open('/content/nlp_hw4/storycloze-2018/short_context_data.txt') as f:
        contexts = [line.strip() for line in f if line.strip()]
        print(contexts)
    # TODO: run the model with different decoding methods and print the outputs (as outlined in the assignment)
    #Rows are temperature x k combinations (9 total rows),
    #columns are features, cells are filled with checks if the feature is present.

    lm = LanguageModel(mode='sampling')
    n = 1
    for begining in contexts[:40]:
      output = lm.generate(begining, max_new_tokens=40)

      print(f"{n}.  Context: {begining}\n→ {output}\n")
      n += 1


['Rick grew up in a troubled household.', "Laverne needs to prepare something for her friend's party.", 'Sarah had been dreaming of visiting Europe for years.', 'Gina was worried the cookie dough in the tube would be gross.', 'It was  my final performance in marching band.', 'Jim found an old disposable camera in the bottom of his junk drawer.', 'Ron started his new job as a landscaper today.', 'John and Billy became very skilled at beer pong.', 'Caroline was a student in medical school.', 'Trish hated the outdoors.', 'Tony was happy to be going on his first cruise.', 'Ignacio wants to play a sport while he is in college.', 'Danny bought a boat.', 'At school, Mary received an assignment to write an essay about pandas.', 'Ellen dreamed of winning a prize for her roses.', 'Jesse had just started fifth grade.', 'Tiffany was getting overwhelmed at work.', 'Gina misplaced her phone at her grandparents.', 'Alice was getting married in a few weeks.', 'Ted loves to go to the movies.', 'Nya had

#Discuss how the outputs from ancestral sampling differ from those obtained from greedy search. Explicitly tie your explanation to how ancestral sampling works in contrast to greedy search



---



Well... as just baseline at least it made something! On top of that it's not repeating like greedy search does! Unfortunately due to weighted nature of the random sampling (one of the aspects that helps it avoid repitition) it ended up creating a non-sensical continuation. Some of them could work, in a way (for example the one about robbing something after getting a bike), but for the most part they jump around with one evenmaking a poem (#37). Ultimately it's main issue is that it doesn't take context into account.

In [None]:
if __name__ == '__main__':
    with open('/content/nlp_hw4/storycloze-2018/short_context_data.txt') as f:
        contexts = [line.strip() for line in f if line.strip()]
        print(contexts)
    # TODO: run the model with different decoding methods and print the outputs (as outlined in the assignment)
    #Rows are temperature x k combinations (9 total rows),
    #columns are features, cells are filled with checks if the feature is present.
    lm = LanguageModel(mode='greedy')
    print(len(contexts))

    #temperatures = [0.2, 0.4, 0.6, 0.8, 1]
    #top_ks = [10, 20, 40, 60, 100]
    temperatures = [0.4, 0.6, 0.8]
    top_ks = [10, 20, 40]
    results = []

    print("top k decoding")
    n = 1
    for temperature in temperatures:
      print(f"Temperature: {temperature}")
      for top_k in top_ks:
        #top_k = 40
        lm = LanguageModel(mode='top-k', k=top_k) #, temperature=temperature)
        #all = []
        print(f"Top-k: {top_k}")
        for begining in contexts[:10]:
          output = lm.generate(begining, max_new_tokens=40)
          #all.append(output)
          print(f"{n}.  Context: {begining}\n→ {output}\n")
          n += 1
            #results.append({'temperature': temperature, 'top_k': top_k, 'outputs': all})
            #results.append(lm.generate(contexts[0]))
            #print(results)
            #print(lm.generate(contexts[0]))

['Rick grew up in a troubled household.', "Laverne needs to prepare something for her friend's party.", 'Sarah had been dreaming of visiting Europe for years.', 'Gina was worried the cookie dough in the tube would be gross.', 'It was  my final performance in marching band.', 'Jim found an old disposable camera in the bottom of his junk drawer.', 'Ron started his new job as a landscaper today.', 'John and Billy became very skilled at beer pong.', 'Caroline was a student in medical school.', 'Trish hated the outdoors.', 'Tony was happy to be going on his first cruise.', 'Ignacio wants to play a sport while he is in college.', 'Danny bought a boat.', 'At school, Mary received an assignment to write an essay about pandas.', 'Ellen dreamed of winning a prize for her roses.', 'Jesse had just started fifth grade.', 'Tiffany was getting overwhelmed at work.', 'Gina misplaced her phone at her grandparents.', 'Alice was getting married in a few weeks.', 'Ted loves to go to the movies.', 'Nya had

#Identify ~3-5 features on which to evaluate results qualitatively. Explain why you chose these.




---


It depens on wht you want to do. Right now I just want it to be: cohesive with the prompt, the refrenced gender to be consistent, and for (what I view) as a smooth sentence. This would allow me to think that it coul dhave been a sentence someone actually said in real life.


0.4, 10 (maybe 40) : this one is more cohesive, smooth telling, and has gender consistent
0.2, 10



0.6 20 <- temp 0.6 is pretty good in general

In [None]:
import pandas as pd

temperatures = [0.4, 0.6, 0.8]
ks = [10, 20, 40]
features = ["Coherence", "Consistency", "Smoothness"]
results = [["✓","X","✓"],
           ["X","X","✓"],
           ["✓","X","X"],

           ["X","✓","✓"],
           ["✓","✓","X"],
           ["✓","✓","✓"],

            ["✓","✓","✓"],
           ["✓","✓","X"],
           ["X","✓","✓"]]

rows = []
# Initialize an index to track which set of results corresponds to the current (temperature, k) combination
results_row_index = 0

for t in temperatures:
    for k in ks:
        row = {"temperature": t, "k": k}
        # Iterate through features with their index and name
        for feature_col_index, feature_name in enumerate(features):
            if results_row_index < len(results):
                # Access the specific feature result from the 'results' list
                # Use feature_name as the dictionary key and feature_col_index to get the value
                row[feature_name] = results[results_row_index][feature_col_index]
        rows.append(row)
        results_row_index += 1 # Move to the next set of results for the next combination

df = pd.DataFrame(rows)
print(df)


   temperature   k Coherence Consistency Smoothness
0          0.4  10         ✓           X          ✓
1          0.4  20         X           X          ✓
2          0.4  40         ✓           X          X
3          0.6  10         X           ✓          ✓
4          0.6  20         ✓           ✓          X
5          0.6  40         ✓           ✓          ✓
6          0.8  10         ✓           ✓          ✓
7          0.8  20         ✓           ✓          X
8          0.8  40         X           ✓          ✓
