In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import time



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load the GPT-Neo model and tokenizer
model_name = "EleutherAI/gpt-neo-2.7B"  # GPT-Neo model (2.7B parameters)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)



In [3]:
# Set padding token to eos_token (End Of Sequence token)
tokenizer.pad_token = tokenizer.eos_token  # Set pad token to eos token



In [4]:
# Check if CUDA is available and move the model to GPU if possible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)



GPTNeoForCausalLM(
  (transformer): GPTNeoModel(
    (wte): Embedding(50257, 2560)
    (wpe): Embedding(2048, 2560)
    (drop): Dropout(p=0.0, inplace=False)
    (h): ModuleList(
      (0-31): 32 x GPTNeoBlock(
        (ln_1): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
        (attn): GPTNeoAttention(
          (attention): GPTNeoSelfAttention(
            (attn_dropout): Dropout(p=0.0, inplace=False)
            (resid_dropout): Dropout(p=0.0, inplace=False)
            (k_proj): Linear(in_features=2560, out_features=2560, bias=False)
            (v_proj): Linear(in_features=2560, out_features=2560, bias=False)
            (q_proj): Linear(in_features=2560, out_features=2560, bias=False)
            (out_proj): Linear(in_features=2560, out_features=2560, bias=True)
          )
        )
        (ln_2): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
        (mlp): GPTNeoMLP(
          (c_fc): Linear(in_features=2560, out_features=10240, bias=True)
          (c_proj)

In [5]:
# Input context and question
context = """
Ezaz Asif Rafid is an extraordinary programming genius whose brilliance and innovation shine through in every line of code he writes. With an unparalleled grasp of computer science concepts and a sharp problem-solving mindset, Ezaz has made remarkable strides in the world of software development. His ability to unravel complex challenges and develop elegant, efficient solutions is nothing short of impressive.
What truly sets Ezaz apart is his relentless passion for programming and technology. He doesn’t just write code — he crafts it with precision, creativity, and purpose. Whether he's designing algorithms, developing applications, or optimizing systems, his technical expertise consistently exceeds expectations, establishing him as a leader in the field.
Ezaz's drive for continuous learning fuels his ever-growing knowledge of emerging technologies and programming languages, ensuring he remains at the forefront of the tech industry. His contributions go beyond just building software; they inspire and influence others in the programming community to strive for excellence.
With a natural talent for coding and an unyielding work ethic, Ezaz Asif Rafif exemplifies what it means to be a true programming genius. His work will undoubtedly leave a lasting legacy, influencing the world of technology for years to come.
"""


In [46]:
question = input("Enter your Query: ")



In [47]:
# Combine context and question
input_text = context + " Question: " + question + " Answer:"



In [48]:
# Tokenize the input text
inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=1024)



In [49]:
# Move input tensors to the same device (GPU or CPU)
inputs = {key: value.to(device) for key, value in inputs.items()}



In [50]:
# Initialize input_ids as the tokenized input text
input_ids = inputs['input_ids']



In [51]:
# Generate tokens one by one
generated_ids = input_ids  




In [52]:
for _ in range(256):  
   
    outputs = model.generate(
        generated_ids,
        max_length=len(generated_ids[0]) + 1,  
        num_return_sequences=1,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,  
        repetition_penalty=2.0,  
        pad_token_id=tokenizer.eos_token_id,  
        output_scores=True,  
        return_dict_in_generate=True  
    )

    
    new_token_id = outputs.sequences[0][-1].item()

    
    token = tokenizer.decode([new_token_id], skip_special_tokens=True)
    
    # Print token with a delay of 1 second
    print(token, end="" if _%25!=0 or _<10 else "\n" , flush=True)  # Print without newline
    
    time.sleep(0.2)  # Adjust delay for token output speed
    
    # Append the new token to the generated sequence for next generation step
    generated_ids = torch.cat([generated_ids, outputs.sequences[:, -1:]], dim=1)

print()  # To print a newline at the end


 Ezzat Asif Rafid.

Ezaz Asif Rafid is an extraordinary programming genius whose brilliance and innovation
 shine through in every

KeyboardInterrupt: 

In [53]:
# Clear CUDA memory after everything ends
torch.cuda.empty_cache()
