# **AI-Driven NLP: Language Model Exploration**


 ## **Import Dependencies**

In [None]:

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import matplotlib.pyplot as plt
import seaborn as sns
import google.generativeai as genai
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer

## **Load Pre-trained Language Model**

In [None]:

def load_model(model_name="gpt2"):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)
    return tokenizer, model

In [None]:
tokenizer, model = load_model("gpt2")

## **Define Inference Function**

In [None]:
def generate_text(prompt, max_length=100):
    inputs = tokenizer(prompt, return_tensors="pt")
    with torch.no_grad():
        output = model.generate(**inputs, max_length=max_length)
    return tokenizer.decode(output[0], skip_special_tokens=True)

## **Test Model Output**

In [None]:
prompt = "Once upon a time, in a distant land"
generated_text = generate_text(prompt)
print("Generated Text:\n", generated_text)

In [None]:
def setup_gemini(api_key, model_name="gemini-1.5-pro-latest"):
    genai.configure(api_key=api_key)
    model = genai.GenerativeModel(model_name)
    return model

def generate_gemini_text(model, prompt):
    response = model.generate_content(prompt)
    return response.text

api_key = "AIzaSyPaVjwVTcEGd9uSgTVf-FigBSvrsQL987Y"
gemini_model = setup_gemini(api_key, "gemini-1.5-pro-latest")
gemini_response = generate_gemini_text(gemini_model, prompt)
print("Gemini Output:\n", gemini_response)

## **Research Questions & Objectives**
* How well does GPT-2 understand contextual prompts?
* Can GPT-2 generate coherent and creative responses?
* What are the limitations of GPT-2 in handling ambiguous inputs?


## **Implementation of GPT-2**


In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Load GPT-2 model and tokenizer
model_name = 'gpt2'
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

## **Exploration & Analysis**

In [None]:
sample_prompts = [
    "Once upon a time in a futuristic city, there was a robot who",
    "The key difference between classical physics and quantum mechanics is",
    "A fascinating fact about black holes is that"
]

for prompt in sample_prompts:
    inputs = tokenizer(prompt, return_tensors='pt')
    output = model.generate(**inputs, max_length=50)
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

    # Print only the prompt and generated text without unnecessary messages
    print(f'Prompt: {prompt}\nGenerated: {generated_text}\n')

## **Visualization of Results**


In [None]:
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer

reference = "Once upon a time in a futuristic city, there was a robot who helped humans."
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

bleu_score = sentence_bleu([reference.split()], generated_text.split())
scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
rouge_scores = scorer.score(reference, generated_text)

print(f'BLEU Score: {bleu_score}')
print(f'ROUGE Scores: {rouge_scores}')

## **Conclusion & Insights**
* While GPT-2 is capable of generating fluent and grammatically correct text, it may face challenges with maintaining coherence over longer passages.
* Its ability to understand context is heavily influenced by how prompts are structured.
* To enhance its performance, especially in domain-specific tasks, further fine-tuning on targeted datasets is recommended.