<a href="https://colab.research.google.com/github/AnasAlhasan/large-models-course/blob/main/notebooks/large_language_model_gpt2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**Downloading the necessary libraries**

In [None]:
!pip -q install torch torchvision torchaudio transformers


#**Selecting the Device**

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

if torch.cuda.is_available():
  device ="cuda"
else:
  device = "cpu"
device

In [None]:
token = AutoTokenizer.from_pretrained("gpt2") #converts the text to numbers so the model can understand
model = AutoModelForCausalLM.from_pretrained("gpt2").to(device) #download the model and then send to the device that will generate the response

#**1. Testing the model**

In [None]:
prompt = "Artificial intelligence will"
inputs = token.encode(prompt, return_tensors="pt").to(device)

with torch.no_grad():
  outputs = model.generate(inputs, max_length = 40, do_sample=True, top_k= 50)


print(token.decode(outputs[0], skip_special_tokens= True))

#**Benchmarking the Model**

In [None]:
import time
prompt = "The future of AI is"
inputs = token.encode(prompt, return_tensors="pt").to(device)



for batch in [1,2,4,8]:
  batch_inputs = inputs.repeat(batch,1)
  start = time.time()
  with torch.no_grad():
    _ = model.generate(batch_inputs, max_length=40, do_sample=True, top_k=50)
    dur = time.time() - start
    print(f"Batch size: {batch} | Time: {dur:.4f}s")

In [None]:
def chat_gpt2():

  while True:
     user_prompt = input('\nwrite "quit" to exit\nYou: ')

     if user_prompt.lower() == "quit":
            break

     inputs = token.encode(user_prompt, return_tensors="pt").to(device)

     with torch.no_grad():
        outputs = model.generate(
            inputs,
            max_length = len(inputs[0])+100,
            do_sample =True,
            top_k =50,
            pad_token_id = token.eos_token_id
        )

     response = token.decode(outputs[0], skip_special_tokens =True)

     generatedPart = response[len(user_prompt):]

     print("gpt2: ", generatedPart.strip())



#**Testing the model**

In [None]:
chat_gpt2()

#**Insights**
This model is used to predict the next word when given a prompt, it highly unlikely to generate answers unrelated to the topic. It just predicts the next word. the **AutoTokenizer Library** is used to convert the prompt to numbers and then send it to the device. **AutoModelforCausalLM** is used for setting up the model and put it on device to run it.