<a href="https://colab.research.google.com/github/Moulyaamohankumar/CodeOfDuty-Krivya/blob/AI-DemoPrompt/Copy_of_Krivya.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q torch torchvision torchaudio transformers datasets evaluate accelerate wikipedia-api google-api-python-client


In [None]:
import torch, numpy as np
from datasets import Dataset
from transformers import (
    RobertaTokenizerFast, RobertaForSequenceClassification,
    TrainingArguments, Trainer, pipeline
)
import evaluate


In [None]:
data = {
    "text": [
        "Water boils at 100°C at sea level",
        "Aliens landed in New York yesterday",
        "Some experts believe coffee might affect sleep",
        "Vaccines cause autism",
        "Studies show moderate exercise improves mood"
    ],
    "label": [0,2,1,2,0]  # 0=True,1=Not sure,2=False
}
dataset = Dataset.from_dict(data).train_test_split(test_size=0.4)
dataset


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 3
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 2
    })
})

In [None]:
tokenizer = RobertaTokenizerFast.from_pretrained("roberta-base")
MAX_LEN = 256

def preprocess(batch):
    enc = tokenizer(batch["text"], truncation=True, padding="max_length", max_length=MAX_LEN)
    enc["labels"] = batch["label"]
    return enc

tokenised = dataset.map(preprocess, batched=True, remove_columns=["text"])
tokenised.set_format("torch")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

In [None]:
id2label = {0:"True",1:"Not sure",2:"False"}
label2id = {v:k for k,v in id2label.items()}

model = RobertaForSequenceClassification.from_pretrained(
    "roberta-base",
    num_labels=3,
    id2label=id2label,
    label2id=label2id
)


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
acc = evaluate.load("accuracy")
f1 = evaluate.load("f1")

def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    return {
        "accuracy": acc.compute(predictions=preds, references=p.label_ids)["accuracy"],
        "f1_macro": f1.compute(predictions=preds, references=p.label_ids, average="macro")["f1"]
    }

args = TrainingArguments(
    output_dir="krivya_demo",
    eval_strategy="epoch",
    save_strategy="no",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    learning_rate=2e-5,
    weight_decay=0.01,
    fp16=torch.cuda.is_available(),
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenised["train"],
    eval_dataset=tokenised["test"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()


  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro
1,No log,1.116636,0.5,0.333333
2,No log,1.117677,0.5,0.333333
3,No log,1.117784,0.5,0.333333




TrainOutput(global_step=3, training_loss=1.066986878712972, metrics={'train_runtime': 76.3146, 'train_samples_per_second': 0.118, 'train_steps_per_second': 0.039, 'total_flos': 1184010379776.0, 'train_loss': 1.066986878712972, 'epoch': 3.0})

In [None]:
# verdict
verdict_pipe = pipeline("text-classification",
                        model=trainer.model, tokenizer=tokenizer,
                        return_all_scores=True, truncation=True,
                        device=0 if torch.cuda.is_available() else -1)

# emotion / sentiment / toxicity
emotion_pipe   = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base")
sentiment_pipe = pipeline("sentiment-analysis")
toxicity_pipe  = pipeline("text-classification", model="unitary/toxic-bert")


Device set to use cpu
Device set to use cpu
No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu
Device set to use cpu


In [None]:
import wikipediaapi
from googleapiclient.discovery import build
from transformers import pipeline


# Add a user agent to the Wikipedia API call
wiki = wikipediaapi.Wikipedia("MyFactCheckerApp/1.0 (myemail@example.com)", "en")


# <-- put your Google Custom Search keys here (optional) -->
# Replace with your actual Google API Key
GOOGLE_API_KEY = "AIzaSyCTa3jEblWouda91KgoA2UQM_7lQFAx_OM"
# Replace with your actual Google Custom Search Engine ID
GOOGLE_CX = "6065c6b8c6ebd410a"

def search_google(query, n=7):
    if not GOOGLE_API_KEY or not GOOGLE_CX:
        print("Google API Key or Custom Search CX not provided.") # Add a print statement here
        return []
    try:
        service = build("customsearch", "v1", developerKey=GOOGLE_API_KEY)
        res = service.cse().list(q=query, cx=GOOGLE_CX, num=n).execute()
        return [item["snippet"] for item in res.get("items", [])]
    except Exception as e: # Add a try-except block to catch potential errors
        print(f"Error during Google Search: {e}")
        return []


def search_wikipedia(query, n=1):
    page = wiki.page(query)
    return [page.summary[:500]] if page.exists() else []

# NLI model to compare claim with evidence
nli_roberta = pipeline(
    "text-classification",
    model="ynie/roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli"
)
nli_bart = pipeline(
    "text-classification",
    model="facebook/bart-large-mnli"
)
qa_model = pipeline(
    "question-answering",
    model="deepset/roberta-base-squad2"
)


Some weights of the model checkpoint at ynie/roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu
Device set to use cpu


Fetching 0 files: 0it [00:00, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 0 files: 0it [00:00, ?it/s]

Device set to use cpu


In [None]:
import wikipediaapi
from googleapiclient.discovery import build
from transformers import pipeline


# Add a user agent to the Wikipedia API call
wiki = wikipediaapi.Wikipedia("MyFactCheckerApp/1.0 (myemail@example.com)", "en")


# <-- put your Google Custom Search keys here (optional) -->
# Replace with your actual Google API Key
GOOGLE_API_KEY = "AIzaSyCTa3jEblWouda91KgoA2UQM_7lQFAx_OM"
# Replace with your actual Google Custom Search Engine ID
GOOGLE_CX = "6065c6b8c6ebd410a"

def search_google(query, n=7):
    if not GOOGLE_API_KEY or not GOOGLE_CX:
        print("Google API Key or Custom Search CX not provided.") # Add a print statement here
        return []
    try:
        service = build("customsearch", "v1", developerKey=GOOGLE_API_KEY)
        res = service.cse().list(q=query, cx=GOOGLE_CX, num=n).execute()
        return [item["snippet"] for item in res.get("items", [])]
    except Exception as e: # Add a try-except block to catch potential errors
        print(f"Error during Google Search: {e}")
        return []


def search_wikipedia(query, n=1):
    page = wiki.page(query)
    return [page.summary[:500]] if page.exists() else []

# NLI model to compare claim with evidence
nli_roberta = pipeline(
    "text-classification",
    model="ynie/roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli"
)
nli_bart = pipeline(
    "text-classification",
    model="facebook/bart-large-mnli"
)
qa_model = pipeline(
    "question-answering",
    model="deepset/roberta-base-squad2"
)

def verify_claim(claim):
    """
    Check a factual claim using:
      - NLI with RoBERTa and BART
      - QA model over retrieved evidence (very weak weight)
    Returns verdict, confidence, top evidence, and all scored items.
    """

    # 🔹 Clean up the claim
    claim = claim.strip().lower()

    # 1️⃣ Retrieve evidence
    evidence = search_wikipedia(claim) + search_google(claim)
    if not evidence:
        return {
            "verdict": "Not sure",
            "confidence": 0,
            "evidence": "No info",
            "others": []
        }

    # 2️⃣ Prepare votes
    label_map = {
        "ENTAILMENT": "True",
        "CONTRADICTION": "False",
        "NEUTRAL": "Not sure"
    }
    votes = {"True": 0, "False": 0, "Not sure": 0}
    scored_items = []

    # 3️⃣ Score evidence
    for snippet in evidence:
        # --- NLI models ---
        for nli_model in [nli_roberta, nli_bart]:
            out = nli_model(f"Claim: {claim}\nEvidence: {snippet}")[0]
            raw_label, score = out["label"].upper(), out["score"]

            mapped = label_map.get(raw_label, "Not sure")
            if raw_label == "NEUTRAL" and score < 0.8:
                mapped = "Not sure"

            votes[mapped] += score
            scored_items.append({
                "snippet": snippet,
                "model": nli_model.model.name_or_path,
                "label": mapped,
                "score": score
            })

        # --- QA model (tiny boost if no negation/risk words) ---
        qa = qa_model({"question": claim, "context": snippet})
        neg_words = [
            "not", "no ", "myth", "false", "doesn't", "isn't",
            "without evidence", "hoax", "conspiracy", "debunk",
            "misinformation", "rumour", "rumor", "unsupported",
            "unfounded", "baseless",
            "risk", "contaminant", "pollution", "pollutant",
            "arsenic", "lead", "hazard", "exposure", "toxin",
            "safety limit", "guideline", "treated water"
        ]
        negation = any(w.lower() in snippet.lower() for w in neg_words)

        if qa["score"] > 0.45 and not negation:
            votes["True"] += qa["score"] * 0.1   # weight reduced to 0.1
            scored_items.append({
                "snippet": snippet,
                "model": "QA",
                "label": "True",
                "score": qa["score"] * 0.1
            })

    # 4️⃣ Best evidence & score
    ev_items = sorted(scored_items, key=lambda x: x["score"], reverse=True)
    best = ev_items[0] if ev_items else None
    best_score = best["score"] * 100 if best else 0

    # 5️⃣ Apply thresholds
    if not best:
        verdict = "Not sure"
        confidence = 0
        best_snip = "No strong evidence"
    else:
        best_snip = best["snippet"]
        if best_score >= 58:
            verdict = "True"
            confidence = round(best_score)
        elif best_score >= 45:
            verdict = "Not sure"
            confidence = round(best_score)
        else:
            verdict = "False"
            confidence = round(100 - best_score)

    # 6️⃣ Return result
    return {
        "verdict": verdict,
        "confidence": confidence,
        "evidence": best_snip,
        "others": scored_items
    }


Some weights of the model checkpoint at ynie/roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu
Device set to use cpu


Fetching 0 files: 0it [00:00, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 0 files: 0it [00:00, ?it/s]

Device set to use cpu


In [None]:
def smart_check(text, threshold=70):
    # 1) quick verdict from your own RoBERTa classifier
    out = verdict_pipe(text)[0]
    out = sorted(out, key=lambda x: x["score"], reverse=True)
    label, conf = out[0]["label"], out[0]["score"]*100

    if conf >= threshold:
        print(f"\nVerdict (model): {label} ({conf:.0f}%)")
    else:
        print(f"\nLow confidence ({conf:.0f}%). Checking external evidence...")
        fact = verify_claim(text)
        print(f"\nVerdict (evidence): {fact['verdict']} ({fact['confidence']}%)")
        print(f"Top evidence:\n  {fact['evidence']}")

    emo = emotion_pipe(text)[0]
    sent = sentiment_pipe(text)[0]
    tox  = toxicity_pipe(text)[0]
    print(f"\nEmotion: {emo['label']} ({emo['score']*100:.0f}%)")
    print(f"Sentiment: {sent['label']} ({sent['score']*100:.0f}%)")
    print(f"Toxicity: {tox['label']} ({tox['score']*100:.0f}%)")


In [None]:
while True:
    claim = input("\nEnter statement (or 'quit'): ")
    if claim.lower().strip() == "quit":
        break
    smart_check(claim)



# Task
Explain the error in the provided Python code snippet that is causing the program to hang after entering input. If possible, fix the error and incorporate the changes into the existing code. Otherwise, diagnose the error. The code snippet is:

```python
while True:
    claim = input("\nEnter statement (or 'quit'): ")
    if claim.lower().strip() == "quit":
        break
    smart_check(claim)

Enter statement (or 'quit'): Water boils at 100°C at sea level
```

The user reports that after entering "Water boils at 100°C at sea level" and pressing Enter, there is no response. The code is running in cell 16 of the notebook `cvXgrGjA28Vm`.

## Inspect the running cell

### Subtask:
Check the output of the running cell (`cvXgrGjA28Vm`) for any hidden error messages or output that might not be immediately obvious.


## Add print statements

### Subtask:
Modify the `smart_check` function to add print statements at different stages to see how far the execution gets before it stops producing output.


**Reasoning**:
Modify the `smart_check` function to add print statements for debugging and execute the cell to update the function.



In [None]:
def smart_check(text, threshold=70):
    print("Starting smart_check function")
    # Step 1: our RoBERTa verdict
    out = verdict_pipe(text)[0]
    print("After verdict_pipe call")
    out = sorted(out, key=lambda x: x["score"], reverse=True)
    label, conf = out[0]["label"], out[0]["score"]*100

    if conf >= threshold:
        print(f"\nVerdict: {label} ({conf:.0f}%)")
    else:
        print(f"\nLow confidence ({conf:.0f}%). Checking evidence...")
        fact = verify_claim(text)
        print("After verify_claim call")
        print(f"Verdict: {fact['verdict']} ({fact['confidence']}%)")
        print(f"Evidence: {fact['evidence']}")

    print("Before emotion_pipe call")
    emo = emotion_pipe(text)[0]
    print("After emotion_pipe call")

    print("Before sentiment_pipe call")
    sent = sentiment_pipe(text)[0]
    print("After sentiment_pipe call")

    print("Before toxicity_pipe call")
    tox  = toxicity_pipe(text)[0]
    print("After toxicity_pipe call")
