# Exercise 1 - How LLMs Predict Tokens

In [None]:
# Install transformers if not already installed
!pip install transformers --quiet

import torch
import torch.nn.functional as F
from transformers import GPT2LMHeadModel, GPT2Tokenizer

In [None]:
tokenizer

GPT2Tokenizer(name_or_path='gpt2', vocab_size=50257, model_max_length=1024, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<|endoftext|>', 'eos_token': '<|endoftext|>', 'unk_token': '<|endoftext|>'}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	50256: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
}
)

In [None]:
# Load the pretrained GPT-2 model and tokenizer
model_name = "gpt2"  # You can try other variants like "gpt2-medium"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
model.eval()  # Set the model to evaluation mode

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [None]:
# Define the initial prompt
prompt = "Once upon a time"
input_ids = tokenizer.encode(prompt, return_tensors="pt")

In [None]:
input_ids

tensor([[7454, 2402,  257,  640]])

In [None]:
# Define the initial prompt
prompt = "Once upon a time"
input_ids = tokenizer.encode(prompt, return_tensors="pt")

print("Initial Prompt:")
print(prompt)
print("=" * 60)

# Set the number of iterations (tokens to generate)
num_iterations = 5
generated_ids = input_ids.clone()

# Generate tokens one by one with nicely formatted output
for i in range(num_iterations):
    # Get the model output for the current sequence
    outputs = model(generated_ids)
    logits = outputs.logits

    # Focus on the logits of the last token
    next_token_logits = logits[0, -1, :]

    # Convert logits to probabilities using softmax
    next_token_probs = F.softmax(next_token_logits, dim=-1)

    # Get the top 5 tokens and their probabilities
    top_probs, top_indices = torch.topk(next_token_probs, 5)

    print(f"\n{'-'*20} Iteration {i+1} {'-'*20}")
    print("Top 5 Token Predictions:")
    print("{:<12s} {:>12s}".format("Token", "Probability"))
    print("-" * 30)
    for token_id, prob in zip(top_indices, top_probs):
        token_str = tokenizer.decode([token_id]).strip()
        print("{:<12s} {:>10.2f}%".format(token_str, prob.item()*100))

    # Choose the token with the highest probability (greedy selection)
    next_token_id = top_indices[0].unsqueeze(0)

    # Append the chosen token to the sequence
    generated_ids = torch.cat([generated_ids, next_token_id.unsqueeze(0)], dim=1)

    # Decode the full sequence to text and display it
    generated_text = tokenizer.decode(generated_ids[0])
    print("\nGenerated Text So Far:")
    print(generated_text)


Initial Prompt:
Once upon a time

-------------------- Iteration 1 --------------------
Top 5 Token Predictions:
Token         Probability
------------------------------
,                 42.69%
the                6.46%
I                  4.06%
he                 3.69%
there              2.94%

Generated Text So Far:
Once upon a time,

-------------------- Iteration 2 --------------------
Top 5 Token Predictions:
Token         Probability
------------------------------
the               11.07%
I                  6.37%
it                 3.99%
when               3.20%
there              3.04%

Generated Text So Far:
Once upon a time, the

-------------------- Iteration 3 --------------------
Top 5 Token Predictions:
Token         Probability
------------------------------
world              1.79%
people             0.55%
two                0.52%
only               0.49%
first              0.48%

Generated Text So Far:
Once upon a time, the world

-------------------- Iteration 4 -------

# Excersie 2 - Prompt Engineering

In [None]:
# Install necessary package
!pip install requests --quiet

import os
import requests
import json

# Set your Hugging Face API token (replace with your actual token)
HF_API_TOKEN = "YOUR_HF_API_TOKEN"  # <-- Replace with your token

# Define the API endpoint for the instruct model
API_URL = "https://api-inference.huggingface.co/models/microsoft/Phi-3-mini-4k-instruct"
headers = {"Authorization": f"Bearer {'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'}"}

# Function to send a request to the Hugging Face API
def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

# Function to generate text from a prompt
def generate_text(prompt, max_new_tokens=200):
    # Append "Answer:" to prompt if not present to signal the model to respond
    if "Answer:" not in prompt:
        prompt = prompt.strip() + "\nAnswer:"

    # Define the payload with prompt and generation parameters
    payload = {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": max_new_tokens,  # Limits response length
            "do_sample": True,                # Enables sampling for varied outputs
            "temperature": 0.8,               # Controls creativity (0-1 range)
            "top_k": 50,                      # Limits token sampling to top 50
            "top_p": 0.95,                    # Uses nucleus sampling
            "repetition_penalty": 1.2,        # Discourages repetition
            "no_repeat_ngram_size": 2,        # Prevents repeating 2-grams
        },
        "options": {"wait_for_model": True}   # Waits if model is loading
    }
    result = query(payload)

    # Check if the response contains generated text, otherwise return error
    if isinstance(result, list) and "generated_text" in result[0]:
        return result[0]["generated_text"]
    else:
        return json.dumps(result)

# Define different prompt types with explanations
prompts = {
    # Zero-Shot: Direct instruction with no examples
    # The model relies solely on its training to respond
    "Zero-Shot": "Explain photosynthesis.",

    # Few-Shot: Provides examples to guide the response format/style
    # Helps the model understand the expected output through context
    "Few-Shot": (
        "Explain photosynthesis.\n"
        "Example 1: In plants, photosynthesis converts sunlight into energy.\n"
        "Example 2: Chlorophyll in leaves absorbs sunlight to produce sugars."
    ),

    # Chain-of-Thought: Breaks the task into steps for logical reasoning
    # Encourages the model to think sequentially
    "Chain-of-Thought": (
        "Explain photosynthesis step-by-step:\n"
        "1. Describe how chlorophyll absorbs sunlight.\n"
        "2. Explain how light energy is converted into chemical energy.\n"
        "3. Describe how sugars are produced from carbon dioxide and water."
    ),

    # Instruction-Based: Assigns a role or specific instructions
    # Tailors the response to a particular audience or context
    "Instruction-Based": (
        "You are a science teacher. Explain photosynthesis in simple terms for middle school students."
    )
}

# Loop through each prompt type, generate response, and display it
for prompt_type, prompt in prompts.items():
    print("=" * 60)
    print(f"{prompt_type} Prompt:")
    print(prompt)
    print("-" * 60)
    print("Generated Output:")
    output = generate_text(prompt)
    print(output)
    print("=" * 60 + "\n")


Zero-Shot Prompt:
Explain photosynthesis.
------------------------------------------------------------
Generated Output:
Explain photosynthesis.
Answer: Photosynthesis is a process by which plants, algae and some bacteria convert light energy from the sun into chemical energy in form of glucose or sugar using water (H2O) absorbed through their roots along with carbon dioxide taken up via small openings on leaves called stomata to produce oxygen as waste product that gets released back out for us all animals including humans who breathe it directly 🌿✨ #BiologyLesson pic.twitter.com/5NKp3bzE1n— Dr_Daisy (@DrSunshineBio!) May 16, 2024

Few-Shot Prompt:
Explain photosynthesis.
Example 1: In plants, photosynthesis converts sunlight into energy.
Example 2: Chlorophyll in leaves absorbs sunlight to produce sugars.
------------------------------------------------------------
Generated Output:
Explain photosynthesis.
Example 1: In plants, photosynthesis converts sunlight into energy.
Example 2:

# Try Yourself

In [None]:
try:
    user_prompt = input("Enter your own prompt to try: ")
    user_output = generate_text(user_prompt)
    print("Your Generated Output:")
    print(user_output)
except Exception as e:
    print(f"Error: {e}")

Enter your own prompt to try: explain llms to 5 year old
Your Generated Output:
explain llms to 5 year old
Answer: Imagine you have a super smart robot friend named "llm" who can understand what words mean just like we do. It's really good at learning and remembering things, so it helps us with our homework or find information on the internet when mommy says help me! But don’t worry about anything dangerous – this special helper is only there for fun stuff that makes your day better too :)


# Exercise 3 - RAG

In [None]:
 #Install required libraries
!pip install chromadb PyMuPDF sentence-transformers transformers requests --quiet

import chromadb
import fitz  # PyMuPDF for PDF extraction
import os
import requests
from sentence_transformers import SentenceTransformer

In [None]:
# Load embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# Initialize ChromaDB client and create a collection
chroma_client = chromadb.PersistentClient(path="./chroma_db")
collection = chroma_client.get_or_create_collection(name="syllabus")

In [None]:
# Function to extract text from PDFs
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text("text") + "\n"
    return text

# Upload syllabus PDFs
pdf_files = {
    "First-Year Syllabus": "/content/FE syllabus for Website.pdf",
    "Second-Year Syllabus": "/content/SE Syllabus for Website.pdf"
}

In [None]:
# Extract and store syllabus data
for name, pdf_path in pdf_files.items():
    syllabus_text = extract_text_from_pdf(pdf_path)
    chunks = syllabus_text.split("\n\n")  # Split into smaller chunks

    for i, chunk in enumerate(chunks):
        embedding = embedding_model.encode(chunk).tolist()
        collection.add(
            ids=[f"{name}_{i}"],
            metadatas=[{"source": name}],
            documents=[chunk],
            embeddings=[embedding]
        )

print("✅ Syllabus data has been stored in ChromaDB!")

✅ Syllabus data has been stored in ChromaDB!


In [None]:
# Function to retrieve relevant syllabus sections
def retrieve_documents(query, top_k=2):
    query_embedding = embedding_model.encode(query).tolist()
    results = collection.query(query_embeddings=[query_embedding], n_results=top_k)
    retrieved_texts = results["documents"][0] if results["documents"] else []
    return retrieved_texts

In [None]:
# Hugging Face API Setup for Phi-3-mini-4k-instruct
HF_API_TOKEN = "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"  # <-- Replace with your Hugging Face API token
API_URL = "https://api-inference.huggingface.co/models/microsoft/Phi-3-mini-4k-instruct"
headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}

In [None]:
#  Function to generate AI-powered answers
def generate_response(query):
    retrieved_docs = retrieve_documents(query)
    context = " ".join(retrieved_docs)  # Combine retrieved syllabus content

    prompt = f"Use the following syllabus details to answer the question:\n{context}\n\nQuestion: {query}\nAnswer:"

    # Call Hugging Face API for LLM inference
    payload = {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": 80,
            "do_sample": True,
            "temperature": 0.8,
            "top_k": 50,
            "top_p": 0.95,
            "repetition_penalty": 1.2,
            "no_repeat_ngram_size": 2,
        },
        "options": {"wait_for_model": True}
    }

    response = requests.post(API_URL, headers=headers, json=payload).json()
    return response[0]["generated_text"] if isinstance(response, list) and "generated_text" in response[0] else "Error: No response."

In [None]:
queries = [
    # "What are the topics covered in Data Structures?",
    # "Explain the course content of Operating Systems."
    "What is taught in Digital Logic and Computer Architecture?"
]

# Run RAG pipeline on queries
for query in queries:
    print("="*60)
    print(f"Query: {query}")
    print("-"*60)
    print("Retrieved Syllabus Content:", retrieve_documents(query))
    print("-"*60)
    print("Generated AI Response:")
    print(generate_response(query))
    print("="*60 + "\n")


Query: What is taught in Digital Logic and Computer Architecture?
------------------------------------------------------------
Retrieved Syllabus Content: [' \nShah & Anchor Kutchhi Engineering College, B.Tech. in Computer Engineering                                                                                       R0-V0-2024-\n25 \n \nProgram: Second Year B.Tech.                                                          Semester: III \nL \nP \nC \nDigital Logic & Computer Architecture \n Course Code: CMCR0304 \n3 \n0 \n3 \n \n Course Objectives: \n1 \n To develop problem-solving skills by digital circuit design. \n2 \n To explore the structure and behavior of computer hardware and software components. \n3 \n To study the hierarchical memory and principles of advanced computing. \n \nCourse Outcomes: \nAfter successful completion of this course, the students should be able to \nCO 1: \n Explore different number systems and demonstrate the arithmetic algorithms. \nCO 2: \n Design and

# Exercise 4 - Agentic AI

In [None]:
# Install required packages
!pip install langchain openai faiss-cpu google-search-results --quiet


import os
import requests


# Set Hugging Face API Key (Replace with your actual API key)
HF_API_TOKEN = "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"

# Hugging Face Inference API URL for Phi-3-mini
HF_API_URL = "https://api-inference.huggingface.co/models/microsoft/Phi-3-mini-4k-instruct"
headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}

In [None]:
# Function to extract stock name using Hugging Face LLM
def extract_stock_name(query):
    prompt = f"""
    Identify the company name or stock ticker symbol in the user's query: {query}.
    Do not return any additional text.

    Stock Name:
    """

    payload = {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": 10,  # Limit output to prevent hallucinations
            "temperature": 0.7,  # Lower temperature for more deterministic results
            "top_p": 0.9,  # Avoid extremely rare outputs
        }
    }

    response = requests.post(HF_API_URL, headers=headers, json=payload).json()

    # Extract generated text
    if isinstance(response, list) and "generated_text" in response[0]:
        stock_name = response[0]["generated_text"].strip()
        if stock_name.lower() == "none" or stock_name == "":
            return "No stock name detected."
        return get_dummy_stock_price(stock_name)  # Return dummy stock price instead of just the name

    return "Error extracting stock name."

In [None]:
import random

# Function to return a random stock price for testing
def get_dummy_stock_price(stock_name):
    if not stock_name or stock_name == "No stock name detected.":
        return "Stock name not recognized. Please specify a valid company or ticker symbol."

    # Generate a random stock price between $100 and $1500
    stock_price = round(random.uniform(100, 1500), 2)

    return f"The current price of {stock_name} is ${stock_price}."


In [None]:
# Test Cases
queries = [
    "What is the latest stock price of Tesla?",
    "Tell me the price of Apple stock.",
    "How is Microsoft performing today?",
    "Can you provide me with the yearly performance of Google?",
    "I'm interested in the earnings call transcript for Apple"
]

for q in queries:
    print(f"Query: {q}")
    print(f"Response: {extract_stock_name(q)}")
    print("="*50)


Query: What is the latest stock price of Tesla?
Response: The current price of Identify the company name or stock ticker symbol in the user's query: What is the latest stock price of Tesla?. 
    Do not return any additional text.

    Stock Name:
    
    - Tesla (Ticker: is $1385.58.
Query: Tell me the price of Apple stock.
Response: The current price of Identify the company name or stock ticker symbol in the user's query: Tell me the price of Apple stock.. 
    Do not return any additional text.

    Stock Name:
    
    Output: Apple

2. is $1422.68.
Query: How is Microsoft performing today?
Response: The current price of Identify the company name or stock ticker symbol in the user's query: How is Microsoft performing today?. 
    Do not return any additional text.

    Stock Name:
    
    **Solution 1:** is $175.93.
Query: Can you provide me with the yearly performance of Google?
Response: The current price of Identify the company name or stock ticker symbol in the user's query: 

In [None]:
import random
import requests
import time
import json

# Dummy stock price function
def get_dummy_stock_price(stock_name):
    """Returns a random dummy stock price for a given stock name."""
    stock_price = round(random.uniform(100, 1500), 2)
    print(f"🔍 Agent: Fetching stock price for {stock_name}... ✅")
    time.sleep(1)  # Simulate processing time
    return f"The current price of {stock_name} is **${stock_price}**."

# Hugging Face API Key & Model Setup (Replace with your actual API key)
HF_API_TOKEN = "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"
HF_API_URL = "https://api-inference.huggingface.co/models/microsoft/Phi-3-mini-4k-instruct"
headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}

# Improved function to extract the stock name using Hugging Face API
def extract_stock_name(query):
    print(f"\n🤖 Agent: Understanding your query... \"{query}\"")
    time.sleep(1)

    # Improved prompt with clear instructions and examples
    prompt = f"""
You are a financial entity extraction agent. Extract ONLY the company name or stock ticker mentioned in the query.
Return ONLY the company or ticker name without any additional text, explanation, or punctuation.
If no company or stock is mentioned, respond with "None".

Examples:
Query: "What's the current price of Apple stock?"
Output: Apple

Query: "Can you tell me TSLA's stock price?"
Output: TSLA

Query: "How is the market doing today?"
Output: None

Now extract from this query: "{query}"
"""

    payload = {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": 10,    # Increased token limit slightly
            "temperature": 0.1,      # Very low but not zero for slight flexibility
            "top_p": 0.95,
            "return_full_text": False  # Only return the generated text, not the prompt
        }
    }

    try:
        # Add error handling for API requests
        response = requests.post(HF_API_URL, headers=headers, json=payload, timeout=10)

        # Check if the request was successful
        if response.status_code == 200:
            result = response.json()

            # Handle different response formats from Hugging Face
            if isinstance(result, list) and len(result) > 0:
                if "generated_text" in result[0]:
                    stock_name = result[0]["generated_text"].strip()
                else:
                    stock_name = str(result[0]).strip()
            elif isinstance(result, dict) and "generated_text" in result:
                stock_name = result["generated_text"].strip()
            else:
                stock_name = str(result).strip()

            # Clean up the response
            stock_name = stock_name.replace("Output:", "").strip()

            # Check if a valid stock name was detected
            if stock_name.lower() in ["none", "", "null", "n/a"]:
                print("⚠️ Agent: No stock name detected in the query.")
                return "I couldn't identify a specific stock in your query. Please mention a company name or ticker symbol."

            print(f"📌 Agent: Detected stock name → **{stock_name}** ✅")
            time.sleep(1)
            return get_dummy_stock_price(stock_name)

        else:
            print(f"⚠️ Agent: API request failed with status code {response.status_code}")
            print(f"Response: {response.text}")
            return f"Sorry, I encountered an error while processing your request (Status code: {response.status_code})."

    except requests.exceptions.RequestException as e:
        print(f"⚠️ Agent: Request error: {str(e)}")
        return "Sorry, I encountered a network error while processing your request."
    except json.JSONDecodeError:
        print("⚠️ Agent: Failed to parse API response as JSON")
        return "Sorry, I received an invalid response from the language model."
    except Exception as e:
        print(f"⚠️ Agent: Unexpected error: {str(e)}")
        return "Sorry, an unexpected error occurred while processing your request."

# Interactive loop to simulate the agent
def main():
    print("💻 Stock Price Agent (Type 'exit' or 'quit' to end)")
    while True:
        user_query = input("\n💬 User: ")  # User input
        if user_query.lower() in ["exit", "quit"]:
            print("👋 Agent: Goodbye!")
            break
        if not user_query.strip():
            print("Please enter a query about a stock price.")
            continue
        response = extract_stock_name(user_query)
        print(f"🤖 Agent: {response}")

if __name__ == "__main__":
    main()

💻 Stock Price Agent (Type 'exit' or 'quit' to end)

💬 User: what is tock price of tesla

🤖 Agent: Understanding your query... "what is tock price of tesla"
📌 Agent: Detected stock name → **### Solution:** ✅
🔍 Agent: Fetching stock price for ### Solution:... ✅
🤖 Agent: The current price of ### Solution: is **$491.4**.


KeyboardInterrupt: Interrupted by user