In [3]:
import subprocess, time, requests, shlex, os, sys

# Install Ollama
cmd = "curl -fsSL https://ollama.com/install.sh | sh"
subprocess.run(cmd, shell=True, check=True)

# Start server in background
subprocess.Popen(shlex.split("ollama serve"), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

# Wait for server
for _ in range(40):
    try:
        requests.get("http://127.0.0.1:11434", timeout=2)
        print(" Ollama server is up.")
        break
    except Exception:
        time.sleep(0.5)
else:
    raise SystemExit("Ollama server not reachable.")


 Ollama server is up.


In [4]:
import subprocess

PRIMARY_MODEL = "llama3.2:1b"
FALLBACKS = ["llama3.2:3b", "mistral:7b-instruct-q4_K_M"]
MODEL = None

def pull(name):
    print(f"Pulling {name} …")
    r = subprocess.run(["ollama", "pull", name], text=True, capture_output=True)
    print(r.stdout[-400:])
    return r.returncode == 0

if pull(PRIMARY_MODEL):
    MODEL = PRIMARY_MODEL
else:
    for fb in FALLBACKS:
        if pull(fb):
            MODEL = fb
            break

if not MODEL:
    raise RuntimeError("Could not pull any model.")
print("Using model:", MODEL)


Pulling llama3.2:1b …

Using model: llama3.2:1b


In [5]:
import requests

def ollama_generate(prompt: str, model: str, stream: bool=False, options: dict=None) -> str:
    payload = {"model": model, "prompt": prompt, "stream": stream}
    if options:
        payload["options"] = options
    r = requests.post("http://127.0.0.1:11434/api/generate", json=payload, timeout=600)
    r.raise_for_status()
    return r.json().get("response","").strip()


In [6]:
# Swap CONTEXT_DOC with your own paragraph if you want
CONTEXT_DOC = (
    "Artificial intelligence is a field of computer science that focuses on creating systems capable of "
    "performing tasks that require human intelligence, such as reasoning, learning, and problem-solving."
)

TASKS = {
    "factual": {
        "context": "",
        "inputs": [
            {"q": "Who wrote Pride and Prejudice?", "gold": "Jane Austen"},
            {"q": "What is the chemical symbol for Gold?", "gold": "Au"},
        ],
    },
    "summarization": {
        "context": CONTEXT_DOC,
        "inputs": [{"q": "Summarize the context in two bullet points.", "gold": None}],
    },
    "reasoning": {
        "context": "",
        "inputs": [
            {"q": "If a train travels at 60 km/h for 2 hours, how far does it travel?", "gold": "120 km"},
            {"q": "A shop sells 3 apples for $2. How much do 9 apples cost?", "gold": "$6"},
        ],
    },
}


In [7]:
# Generic builders
def build_basic(q, context=""):
    return (f"{'Context: ' + context + '\n' if context else ''}"
            f"Question: {q}\nAnswer:")

def build_structured(q, context=""):
    return (f"{'Context: ' + context + '\n' if context else ''}"
            "Return ONLY valid JSON:\n"
            "{\n  \"answer\": \"...\",\n  \"confidence\": 0-1,\n  \"notes\": \"...\"\n}\n"
            f"Question: {q}\nJSON:")

def build_fewshot(q, context=""):
    examples = [
        ("If a car travels at 50 km/h for 3 hours, how far does it go?", "150 km"),
        ("If a person walks 5 km/h for 2 hours, how far do they walk?", "10 km"),
    ]
    ex = "\n".join([f"Q: {x}\nA: {y}" for x,y in examples])
    return (f"{'Context: ' + context + '\n' if context else ''}{ex}\nQ: {q}\nA:")

def build_cot(q, context=""):
    return (f"{'Context: ' + context + '\n' if context else ''}"
            "Think step by step. End with a line: Final Answer: <answer>\n"
            f"Question: {q}\nReasoning:")

strategies = {
    "basic": build_basic,
    "structured": build_structured,
    "fewshot": build_fewshot,
    "cot": build_cot,
}


In [8]:
import re, json, pandas as pd
from datetime import datetime, timezone

def extract_final_answer_from_cot(text: str) -> str:
    for line in text.splitlines():
        if line.strip().lower().startswith("final answer:"):
            return line.split(":", 1)[1].strip()
    return text.strip()

def try_parse_structured_answer(text: str) -> str | None:
    m = re.search(r"\{.*\}", text.strip(), flags=re.DOTALL)
    cand = m.group(0) if m else text
    try:
        obj = json.loads(cand.replace("\n","\\n"))
        if isinstance(obj, dict) and "answer" in obj:
            return str(obj["answer"]).strip()
    except Exception:
        pass
    return None

def normalize(s: str) -> str:
    return re.sub(r"\W+", "", (s or "").lower())

def numeric_value(s: str) -> float | None:
    if s is None: return None
    m = re.search(r"[-+]?\d*\.?\d+", s.replace(",",""))
    return float(m.group(0)) if m else None

def normalize_money(s: str) -> str | None:
    if s is None: return None
    s = s.replace("dollars","$").replace("USD","$").replace("usd","$")
    s = re.sub(r"\s+","", s.strip())
    s = re.sub(r"^(\d+)\$$", r"$\1", s)  # 6$ -> $6
    return s

def fuzzy_correct(pred: str, gold: str) -> bool | None:
    if gold is None: return None
    pred, gold = normalize_money(pred), normalize_money(gold)
    if normalize(pred) == normalize(gold): return True
    if re.search(rf"\b{re.escape(gold)}\b", pred or "", flags=re.IGNORECASE): return True
    gp, gg = numeric_value(pred), numeric_value(gold)
    if gp is not None and gg is not None: return abs(gp - gg) < 1e-6
    return False

rows = []
GEN_OPTS = {"temperature": 0.2}

for task, spec in TASKS.items():
    for item in spec["inputs"]:
        q, gold = item["q"], item["gold"]
        for name, builder in strategies.items():
            prompt = builder(q, spec["context"])
            raw = ollama_generate(prompt, MODEL, options=GEN_OPTS)
            if name == "structured":
                final_pred = try_parse_structured_answer(raw) or raw
            elif name == "cot":
                final_pred = extract_final_answer_from_cot(raw)
            else:
                final_pred = raw
            rows.append({
                "timestamp": datetime.now(timezone.utc).isoformat(),
                "model": MODEL,
                "task": task,
                "strategy": name,
                "question": q,
                "gold": gold,
                "prompt": prompt,
                "raw_response": raw,
                "final_pred": final_pred,
                "correct": fuzzy_correct(final_pred, gold)
            })

df = pd.DataFrame(rows)
df.to_csv("prompt_lab_results.csv", index=False)
df.head(10)


Unnamed: 0,timestamp,model,task,strategy,question,gold,prompt,raw_response,final_pred,correct
0,2025-09-22T02:06:59.740221+00:00,llama3.2:1b,factual,basic,Who wrote Pride and Prejudice?,Jane Austen,Question: Who wrote Pride and Prejudice?\nAnswer:,Pride and Prejudice was written by the English...,Pride and Prejudice was written by the English...,False
1,2025-09-22T02:07:07.706804+00:00,llama3.2:1b,factual,structured,Who wrote Pride and Prejudice?,Jane Austen,"Return ONLY valid JSON:\n{\n ""answer"": ""..."",...","{\n ""answer"": ""Jane Austen"",\n ""confidence"":...","{\n ""answer"": ""Jane Austen"",\n ""confidence"":...",True
2,2025-09-22T02:07:13.381346+00:00,llama3.2:1b,factual,fewshot,Who wrote Pride and Prejudice?,Jane Austen,"Q: If a car travels at 50 km/h for 3 hours, ho...",A: The author of Pride and Prejudice is Jane A...,A: The author of Pride and Prejudice is Jane A...,False
3,2025-09-22T02:07:52.147733+00:00,llama3.2:1b,factual,cot,Who wrote Pride and Prejudice?,Jane Austen,Think step by step. End with a line: Final Ans...,"To answer this question, we need to identify t...","To answer this question, we need to identify t...",False
4,2025-09-22T02:07:54.971442+00:00,llama3.2:1b,factual,basic,What is the chemical symbol for Gold?,Au,Question: What is the chemical symbol for Gold...,The chemical symbol for gold is Au.,The chemical symbol for gold is Au.,False
5,2025-09-22T02:08:00.860039+00:00,llama3.2:1b,factual,structured,What is the chemical symbol for Gold?,Au,"Return ONLY valid JSON:\n{\n ""answer"": ""..."",...","{\n ""answer"": ""Au"",\n ""confidence"": 1,\n ""n...","{\n ""answer"": ""Au"",\n ""confidence"": 1,\n ""n...",True
6,2025-09-22T02:08:09.762206+00:00,llama3.2:1b,factual,fewshot,What is the chemical symbol for Gold?,Au,"Q: If a car travels at 50 km/h for 3 hours, ho...",I can help you with the chemical symbol for Go...,I can help you with the chemical symbol for Go...,False
7,2025-09-22T02:08:23.216826+00:00,llama3.2:1b,factual,cot,What is the chemical symbol for Gold?,Au,Think step by step. End with a line: Final Ans...,"To find the chemical symbol for gold, we need ...",Au.,True
8,2025-09-22T02:08:40.128609+00:00,llama3.2:1b,summarization,basic,Summarize the context in two bullet points.,,Context: Artificial intelligence is a field of...,Here are two bullet points summarizing the con...,Here are two bullet points summarizing the con...,
9,2025-09-22T02:08:52.937673+00:00,llama3.2:1b,summarization,structured,Summarize the context in two bullet points.,,Context: Artificial intelligence is a field of...,* Artificial intelligence is a field of comput...,* Artificial intelligence is a field of comput...,


In [9]:
# Correctness by task × strategy (only where gold exists)
pivot = (df[df["gold"].notna()]
         .pivot_table(index="task", columns="strategy", values="correct",
                      aggfunc=lambda s: f"{sum(bool(x) for x in s)}/{len(s)} correct"))
pivot


strategy,basic,cot,fewshot,structured
task,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
factual,0/2 correct,1/2 correct,0/2 correct,2/2 correct
reasoning,1/2 correct,1/2 correct,0/2 correct,1/2 correct


In [10]:
!pip -q install reportlab

from reportlab.lib.pagesizes import LETTER
from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer, Table, TableStyle
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib.units import inch
from reportlab.lib import colors

styles = getSampleStyleSheet()
doc = SimpleDocTemplate("Prompt_Engineering_Lab_Report.pdf", pagesize=LETTER,
                        leftMargin=36, rightMargin=36, topMargin=36, bottomMargin=36)
story = []

def H(t): return Paragraph(f"<b>{t}</b>", styles["Heading2"])
def P(t): return Paragraph(t, styles["BodyText"])

story += [Paragraph("<b>Prompt Engineering Lab Report (Ollama + LLaMA)</b>", styles["Title"]), Spacer(1,0.2*inch)]
story += [H("Introduction"),
          P("This lab explores Basic, Structured Output, Few-shot, and Chain-of-Thought (CoT) prompts on factual Q&A, "
            "summarization, and reasoning tasks with a small local model served by Ollama in Google Colab."),
          Spacer(1,0.15*inch)]
story += [H("Methodology"),
          P(f"Model: {df['model'].iloc[0]}. We record raw outputs, parse JSON for structured prompts, extract CoT final lines, "
            "and grade with fuzzy rules (substring, numeric tolerance, currency normalization)."),
          Spacer(1,0.15*inch)]

# Results table
agg = (df.assign(correct_txt=df["correct"].map({True:"✔", False:"✘", None:"–"}))
         [["task","strategy","question","final_pred","gold","correct_txt"]])
tbl = [["Task","Strategy","Question","Prediction","Gold","✓"]]
for _, r in agg.iterrows():
    pred = r["final_pred"] or ""
    if len(pred) > 60: pred = pred[:60] + "…"
    tbl.append([r["task"], r["strategy"], r["question"], pred, r["gold"] or "—", r["correct_txt"]])

t = Table(tbl, colWidths=[0.95*inch,0.9*inch,2.2*inch,2.2*inch,0.7*inch,0.35*inch])
t.setStyle(TableStyle([
    ("GRID",(0,0),(-1,-1),0.5,colors.grey),
    ("BACKGROUND",(0,0),(-1,0),colors.lightgrey),
    ("VALIGN",(0,0),(-1,-1),"TOP")
]))
story += [H("Results"), t, Spacer(1,0.2*inch)]

story += [H("Analysis"),
          P("Structured prompts improved format reliability; Few-shot improved pattern fidelity; "
            "CoT improved multi-step reasoning but increased verbosity; Basic was fastest but least reliable on complex reasoning."),
          Spacer(1,0.15*inch)]
story += [H("Recommendations"),
          P("Use Structured for automation pipelines, Few-shot for repeatable patterns, CoT for reasoning, and keep prompts explicit.")]

doc.build(story)
print("Saved: Prompt_Engineering_Lab_Report.pdf")


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.1/2.0 MB[0m [31m4.1 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━[0m [32m1.2/2.0 MB[0m [31m17.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[?25hSaved: Prompt_Engineering_Lab_Report.pdf
