In [1]:
# Cell 1: Basic imports and device
import os
from pathlib import Path
import json
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

import torch
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Device:', DEVICE)


Device: cpu


In [2]:
print('If you need deps, run: pip install -r requirements.txt')


If you need deps, run: pip install -r requirements.txt


In [3]:
# Cell 3: Load a small text2text model (Flan-T5 small) as an example
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
MODEL = 'google/flan-t5-small'
print('Loading', MODEL)
try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL)
    model = AutoModelForSeq2SeqLM.from_pretrained(MODEL).to(DEVICE)
    gen = pipeline('text2text-generation', model=model, tokenizer=tokenizer, device=0 if DEVICE=='cuda' else -1)
    print('Model and pipeline ready')
except Exception as e:
    print('Could not load model automatically in this environment. You can still prepare prompts and run on a machine with internet/GPU.')
    print(e)


Loading google/flan-t5-small


Device set to use cpu


Model and pipeline ready


In [4]:
# Cell 4: Simple generation helper
def generate_answer(prompt, max_len=128, temperature=0.7, num_return_sequences=1):
    try:
        out = gen(prompt, max_length=max_len, do_sample=True, temperature=temperature, num_return_sequences=num_return_sequences)
        return [o['generated_text'].strip() for o in out]
    except Exception as e:
        return [f'ERROR: {e}']

# quick test (works only if model loaded)
print(generate_answer('Q: Who wrote the novel "1984"?\nA:', max_len=64, temperature=0.0))


Both `max_new_tokens` (=256) and `max_length`(=64) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


["ERROR: `temperature` (=0.0) has to be a strictly positive float, otherwise your next token scores will be invalid. If you're looking for greedy decoding strategies, set `do_sample=False`."]


In [5]:
# Cell 5: FAISS demo with sentence-transformers (tiny corpus)
try:
    from sentence_transformers import SentenceTransformer
    import faiss
    EMB_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
    embedder = SentenceTransformer(EMB_MODEL)
    corpus = [
        'George Orwell was an English novelist, essayist, and critic best known for 1984 and Animal Farm.',
        'The capital of France is Paris.',
        'Python is a programming language created by Guido van Rossum.'
    ]
    corpus_emb = embedder.encode(corpus, convert_to_numpy=True)
    index = faiss.IndexFlatL2(corpus_emb.shape[1])
    index.add(corpus_emb)
    def retrieve_top_k(query, k=3):
        q_emb = embedder.encode([query], convert_to_numpy=True)
        D, I = index.search(q_emb, k)
        return [corpus[i] for i in I[0]]
    print('FAISS demo ready — try retrieve_top_k(\'Who wrote 1984?\')')
except Exception as e:
    print('FAISS or sentence-transformers not available in this kernel. Install requirements or run on a machine with internet.')
    print(e)



FAISS demo ready — try retrieve_top_k('Who wrote 1984?')


In [6]:
# Cell 6: Simple RAG: retrieve + prompt generator
def rag_generate(question, k=3):
    try:
        contexts = retrieve_top_k(question, k=k)
    except Exception:
        contexts = []
    combined_context = '\\n'.join([f"Evidence: {c}" for c in contexts])
    prompt = f"{combined_context}\\nQ: {question}\\nA: Provide a concise, evidence-backed answer."
    ans = generate_answer(prompt, max_len=128, temperature=0.0)[0]
    return ans, contexts

# Example (works if retrieval + generation exist)
print(rag_generate('Who wrote the novel 1984?'))


Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


("ERROR: `temperature` (=0.0) has to be a strictly positive float, otherwise your next token scores will be invalid. If you're looking for greedy decoding strategies, set `do_sample=False`.", ['George Orwell was an English novelist, essayist, and critic best known for 1984 and Animal Farm.', 'Python is a programming language created by Guido van Rossum.', 'The capital of France is Paris.'])


In [7]:
# Cell 7: Self-critique helper (ask the model to critique its answer)
def self_critique(question, answer):
    critique_prompt = (
        f"Q: {question}\\nA: {answer}\\n\\nPlease list any claims in the answer that might be uncertain or require citation. For each claim, say whether you are confident and if not, ask for evidence."
    )
    critique = generate_answer(critique_prompt, max_len=200, temperature=0.0)[0]
    return critique

# Example
print(self_critique('Who wrote 1984?', 'The novel 1984 was written by George Orwell.'))


Both `max_new_tokens` (=256) and `max_length`(=200) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


ERROR: `temperature` (=0.0) has to be a strictly positive float, otherwise your next token scores will be invalid. If you're looking for greedy decoding strategies, set `do_sample=False`.


In [8]:
# Cell 8: Save results & export stub
import pandas as pd
results = [{'id':1, 'question':'Who wrote 1984?', 'baseline':'George Orwell', 'rag':'George Orwell'}]
df = pd.DataFrame(results)
fn = Path('results.csv')
df.to_csv(fn, index=False)
print('Saved results.csv to', fn.resolve())


Saved results.csv to C:\Users\abiav\hallucination_cuda\results.csv
