<a href="https://colab.research.google.com/github/HussainMehdi/AGI-Assignment-Gamma/blob/main/Tutorial_5a.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install required packages for local execution
# Note: bitsandbytes requires CUDA for GPU quantization. For CPU-only, it will be skipped.
%pip install -q transformers accelerate ipywidgets wikipedia matplotlib numpy
# Install bitsandbytes only if CUDA is available (optional for CPU)
import subprocess
import sys
try:
    import torch
    if torch.cuda.is_available():
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "bitsandbytes"])
        print("bitsandbytes installed for GPU support")
    else:
        print("CUDA not available. Skipping bitsandbytes installation (CPU mode will be used)")
except:
    print("Could not check CUDA availability. Install bitsandbytes manually if needed.")

Note: you may need to restart the kernel to use updated packages.
CUDA not available. Skipping bitsandbytes installation (CPU mode will be used)


In [None]:
#Initialize Pipeline and Load LLM

from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
import torch

# Check if CUDA is available
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Choose a free model (Falcon is lighter for Colab)
model_name = "tiiuae/falcon-7b-instruct"    # Invoking LLMs
# Alternative: "mistralai/Mistral-7B-Instruct-v0.1"

# Configure quantization for GPU (8-bit quantization saves GPU memory)
if device == "cuda":
    quantization_config = BitsAndBytesConfig(
        load_in_8bit=True,
        llm_int8_enable_fp32_cpu_offload=False
    )
else:
    quantization_config = None
    print("Warning: Running on CPU. Quantization disabled. Model loading may be slow.")

tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load model with quantization if GPU is available
if quantization_config:
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        quantization_config=quantization_config
    )
else:
    # For CPU, load without quantization
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="cpu",
        torch_dtype=torch.float32
    )

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) # Entire Pipeline


Using device: cpu


`torch_dtype` is deprecated! Use `dtype` instead!


Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.48G [00:00<?, ?B/s]

In [None]:
#Direct Prompting (Testing Pipeline with the model without any template)

query = "Why does crop health decay due to the weather?."    # Prompt

response = pipe(query, max_new_tokens=200, do_sample=True, temperature=0.7)   # Getting result from the Model.

print("\n[Direct Prompting Result]\n", response[0]['generated_text'])

In [None]:
#Direct Prompting (Testing Pipeline with the model without any template)

query = "What is reason of dengue fever?."
response = pipe(query, max_new_tokens=200, do_sample=True, temperature=0.7)
print("\n[Direct Prompting Result]\n", response[0]['generated_text'])

In [None]:
#Direct Prompting (Testing Pipeline with the model without any template)

query = "What topic I should learn for agentic ai course?."
response = pipe(query, max_new_tokens=200, do_sample=True, temperature=0.7)
print("\n[Direct Prompting Result]\n", response[0]['generated_text'])

In [None]:
query = "What topic I should learn for agentic ai course?."
response = pipe(query, max_new_tokens=200, do_sample=True, temperature=0.7)
print("\n[Direct Prompting Result]\n", response[0]['generated_text'])

In [None]:

 #Tool and Prompt Setup ( Calculator  is implemented then prompt is designed in such a way that it uses calculator as as tool )


# Define a calculator tool
def calculator_tool(task):
    try:
        return str(eval(task))
    except:
        return "Error"


# Ask a math question
user_query = "What is 4+2?"

#user_query = "What is weather?"

prompt = f"""
You are an AI agent. You can either:
 1. Answer directly using reasoning, OR
 2. Say: 'USE_CALCULATOR: <expression>' if it's math.


Question: {user_query}
"""

response = pipe(prompt, max_new_tokens=100)[0]['generated_text']
print("\n[Agent Decision]\n", response)

# Check if calculator is needed
if "USE_CALCULATOR:" in response:
    expr = response.split("USE_CALCULATOR:")[1].strip()
    result = calculator_tool(expr)
    print("\n[Calculator Used]\nResult:", result)
else:
    print("\n[AI Answer]\n", response)



In [None]:
# Define a symptom checker tool
def symptom_checker_tool(query):
    data = {
        "fever": "Fever is a sign of infection. Monitor temperature and rest well.",
        "headache": "Headaches may be caused by dehydration, stress, or vision strain.",
        "cough": "Cough could be due to allergies or infection. Stay hydrated and avoid cold drinks.",
    }
    for key, value in data.items():
        if key in query.lower():
            return value
    return "No information found for that symptom."

# Define a prescription tool
def prescription_tool(condition):
    prescriptions = {
        "fever": "Paracetamol 500mg every 6 hours if needed, stay hydrated, and rest.",
        "headache": "Ibuprofen 200mg if no stomach issues, drink water, and take rest.",
        "cough": "Cough syrup (dextromethorphan) twice daily and warm fluids.",
    }
    for key, value in prescriptions.items():
        if key in condition.lower():
            return f"Prescription: {value}"
    return "No prescription available for that condition."

# Example query
user_query = "I have a headache, what medicine should I take?"

prompt = f"""
You are a helpful medical AI assistant.
You have access to two tools:
1. USE_SYMPTOM_TOOL: <query>  â€” for general symptom info.
2. USE_PRESCRIPTION_TOOL: <condition> â€” for medicine suggestions.

Decide which tool to use for the following question:
Question: {user_query}
"""

# Get response from model
response = pipe(prompt, max_new_tokens=100)[0]['generated_text']
print("\n[Agent Decision]\n", response)

if "USE_PRESCRIPTION_TOOL" in response:
    condition = user_query  # or extract condition
    result = prescription_tool(condition)
    print("\n[Prescription Tool Used]\nResult:", result)

elif "USE_SYMPTOM_TOOL" in response:
    query = user_query
    result = symptom_checker_tool(query)
    print("\n[Symptom Tool Used]\nResult:", result)

else:
    print("\n[AI Answer]\n", response)


In [None]:
%pip install wikipedia


In [None]:
import re, math, numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
from IPython.display import Image, display
import wikipedia
import google.generativeai as genai
import os


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
import torch

# Check if CUDA is available
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

model_name = "tiiuae/falcon-7b-instruct"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Configure quantization with CPU offload for GPU
if device == "cuda":
    quantization_config = BitsAndBytesConfig(
        load_in_8bit=True,
        llm_int8_enable_fp32_cpu_offload=True  # enables CPU offload correctly
    )
    
    # Load model with offloading enabled
    modelF = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",                       # automatically split across GPU/CPU
        quantization_config=quantization_config
    )
    
    # Create text-generation pipeline
    pipe = pipeline(
        "text-generation",
        model=modelF,
        tokenizer=tokenizer,
        torch_dtype=torch.float16,
        device_map="auto"
    )
else:
    # For CPU, load without quantization
    print("Warning: Running on CPU. Quantization disabled. Model loading may be slow.")
    modelF = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="cpu",
        torch_dtype=torch.float32
    )
    
    # Create text-generation pipeline
    pipe = pipeline(
        "text-generation",
        model=modelF,
        tokenizer=tokenizer,
        device_map="cpu"
    )



**Tools**

In [None]:

# ---- Calculator Tool ----
def calculator_tool(task):
    try:
        expr = task.strip().replace("^", "**")
        return str(eval(expr, {"__builtins__": None}, math.__dict__))
    except Exception:
        return "Error"

# ---- Datetime Tool ----
def datetime_tool(task):
    now = datetime.now()
    task = task.lower()
    if any(word in task for word in ["date", "today", "day"]):
        return now.strftime("%Y-%m-%d")
    elif any(word in task for word in ["time", "now", "clock"]):
        return now.strftime("%H:%M:%S")
    return "Please specify 'date' or 'time'."


# ---- Weather Tool (mock) ----
def weather_tool(city):
    data = {
        "New York": "10Â°C, cloudy",
        "London": "12Â°C, light rain",
        "Delhi": "28Â°C, clear",
        "Tokyo": "18Â°C, windy"
    }
    return f"The weather in {city} is {data.get(city, 'unavailable')}."

# ---- Wikipedia Tool ----
def wikipedia_tool(query):
    try:
        return wikipedia.summary(query, sentences=2)
    except Exception:
        return "No result found."

# ---- Plot Tool ----
def plot_tool(expression, start=-10, end=10):
    try:
        x = np.linspace(start, end, 400)
        y = eval(expression, {"x": x, "np": np, "__builtins__": None})
        plt.figure(figsize=(6,4))
        plt.plot(x, y, label=f"y = {expression}")
        plt.title(f"Plot of {expression}")
        plt.xlabel("x")
        plt.ylabel("y")
        plt.legend()
        plt.grid(True)
        plt.savefig("plot_result.png")
        plt.close()
        display(Image("plot_result.png"))
        return "âœ… Plot generated and displayed below."
    except Exception as e:
        return f"Error plotting: {e}"

# ---- Tool Router ----
def tool_router(response_text):
    tool_patterns = {
        "CALCULATOR": calculator_tool,
        "DATETIME": datetime_tool,
        "WEATHER": weather_tool,
        "WIKIPEDIA": wikipedia_tool,
        "PLOT": plot_tool
    }

    m = re.search(r"USE_TOOL:\s*([A-Z]+)\s*(.*)", response_text.strip())
    if not m:
        return None

    tool_name, argument = m.groups()
    tool_func = tool_patterns.get(tool_name)

    if tool_func:
        print(f"[ðŸ§© Tool Invoked: {tool_name}] with argument â†’ {argument.strip()}")
        result = tool_func(argument.strip())
        return {"tool": tool_name, "result": result}
    return None



Using Instruct Model

In [None]:
def run_agent(pipe, user_query):
    prompt = f"""
You are an AI assistant with access to these tools:
- CALCULATOR: for math (e.g., 2+2)
- DATETIME: for date/time queries
- WEATHER: for city weather info
- WIKIPEDIA: for factual summaries
- PLOT: to visualize math functions (e.g., x**2)

If the question requires using a tool, you MUST respond ONLY with:
USE_TOOL: <TOOL_NAME> <query>

Do not write anything else.

Otherwise, answer directly.

Question: {user_query}
"""

    # Generate output using the pipeline
    response = pipe(prompt, max_new_tokens=150, do_sample=True)
    response_text = response[0]["generated_text"].split("Question:")[-1].strip()

    print("\n[ðŸ¤– Model Output]\n", response_text)

    tool_result = tool_router(response_text)
    if tool_result:
        print(f"[Tool Result]: {tool_result['result']}")
    else:
        print("[Direct Answer]", response_text)


In [None]:

run_agent(pipe, "What is weather of karachi?")


In [None]:
 run_agent(pipe, "What is 3 * 3?")


In [None]:
run_agent(pipe, "Where is Karachi?.")


In [None]:
 run_agent(pipe, "Plot x**2 + 3*x + 2")
