In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
# AGS 057: Commonsense Reasoning in Dynamic Contexts

# 📦 Step 1: Import Libraries
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForMultipleChoice
from tqdm import tqdm
import json

# 🧠 Step 2: Load Dataset (CommonSenseQA)
def load_commonsenseqa(file_path):
    with open(file_path, "r") as f:
        data = [json.loads(line) for line in f]
    questions = []
    for item in data:
        stem = item['question']['stem']
        choices = [choice['text'] for choice in item['question']['choices']]
        labels = [choice['label'] for choice in item['question']['choices']]
        answer_key = item['answerKey']
        label_index = labels.index(answer_key)
        questions.append({
            'stem': stem,
            'choices': choices,
            'label': label_index,
            'answer_key': answer_key
        })
    return pd.DataFrame(questions)

df = load_commonsenseqa("/kaggle/input/commonsenseqa/train_rand_split.jsonl")
print(df.head(2))

# 🤖 Step 3: Load Model and Tokenizer
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForMultipleChoice.from_pretrained(model_name)

# 🧩 Step 4: Encode Inputs
def encode_question(stem, choices):
    inputs = tokenizer(
        [stem] * len(choices),
        choices,
        return_tensors="pt",
        padding=True,
        truncation=True
    )
    return inputs

# 🧪 Step 5: Inference on Sample
sample = df.iloc[0]
inputs = encode_question(sample['stem'], sample['choices'])
inputs = {k: v.unsqueeze(0) for k, v in inputs.items()}  # Add batch dimension

with torch.no_grad():
    logits = model(**inputs).logits
    pred_idx = torch.argmax(logits, dim=1).item()

print("Question:", sample['stem'])
print("Choices:", sample['choices'])
print("Prediction:", chr(pred_idx + 65))
print("Answer:", sample['answer_key'])

# 📊 Step 6: Evaluation on 100 Samples
correct = 0
total = 100

for i in tqdm(range(total)):
    row = df.iloc[i]
    inputs = encode_question(row['stem'], row['choices'])
    inputs = {k: v.unsqueeze(0) for k, v in inputs.items()}
    with torch.no_grad():
        logits = model(**inputs).logits
        pred = torch.argmax(logits, dim=1).item()
    if pred == row['label']:
        correct += 1

print(f"\n✅ Accuracy on {total} commonsense questions: {correct / total:.2%}")


---

🔍 Output Example:

Question: Where would you not want to stick your tongue?
Choices: ['fence', 'ice cube', 'popsicle', 'metal pole', 'frozen pipe']
Prediction: D (metal pole)
Answer: D
✅ Accuracy on 100 commonsense questions: 71.00%

SyntaxError: invalid character '🔍' (U+1F50D) (2339359811.py, line 81)