In [27]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from datasets import load_dataset
import gensim.downloader as api
# import wandb  # For logging

# # Initialize Weights & Biases
# wandb.init(project="commonsense_qa")

# -----------------------------
# 1. Load and Preprocess Dataset
# -----------------------------
dataset = load_dataset("commonsense_qa")
train_data, val_data = dataset['train'], dataset['validation']

# Preprocess text (Tokenization)
def preprocess(text):
    return text.lower().split()

train_sentences = [preprocess(q) for q in train_data['question']]
val_sentences = [preprocess(q) for q in val_data['question']]

# -----------------------------
# 2. Load Pretrained Word2Vec Model
# -----------------------------
wv = api.load('word2vec-google-news-300')

# Function to get sentence embeddings
def get_embedding(sentence, model):
    vectors = [model[word] for word in sentence if word in model]
    return np.mean(vectors, axis=0) if vectors else np.zeros(model.vector_size)

# Convert dataset into embeddings
X_train = np.array([get_embedding(q, wv) for q in train_sentences])
X_val = np.array([get_embedding(q, wv) for q in val_sentences])

# Convert labels to numerical values
label_map = {label: idx for idx, label in enumerate(set(train_data['answerKey']))}
y_train = np.array([label_map[label] for label in train_data['answerKey']])
y_val = np.array([label_map[label] for label in val_data['answerKey']])

# -----------------------------
# 3. Create PyTorch Dataset
# -----------------------------
class CommonsenseDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = CommonsenseDataset(X_train, y_train)
val_dataset = CommonsenseDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# -----------------------------
# 4. Model Architectures
# -----------------------------
class SimpleClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(SimpleClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        return self.fc2(x)

class RNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(RNNModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=2, batch_first=True)
        self.fc1 = nn.Linear(hidden_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        x = x.unsqueeze(1)
        _, (hidden, _) = self.lstm(x)
        x = self.relu(self.fc1(hidden[-1]))
        return self.fc2(x)

# Model parameters
input_dim = 300  # Word2Vec embedding size
hidden_dim = 128
output_dim = len(label_map)

# Instantiate models
classifier = SimpleClassifier(input_dim, hidden_dim, output_dim)
rnn_model = RNNModel(input_dim, hidden_dim, output_dim)

# -----------------------------
# 5. Training Function
# -----------------------------
def train_model(model, train_loader, val_loader, epochs=10):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    model.train()
    
    for epoch in range(epochs):
        total_loss = 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        val_acc = evaluate_model(model, val_loader)
        # wandb.log({"Epoch": epoch + 1, "Loss": total_loss / len(train_loader), "Val Accuracy": val_acc})
        print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}, Val Accuracy: {val_acc:.4f}")

# -----------------------------
# 6. Evaluation Function
# -----------------------------
def evaluate_model(model, val_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            outputs = model(X_batch)
            _, predicted = torch.max(outputs, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()
    return correct / total

# Train and Evaluate Classifier
print("Training Classifier...")
train_model(classifier, train_loader, val_loader, epochs=10)

# Train and Evaluate RNN Model
print("Training RNN Model...")
train_model(rnn_model, train_loader, val_loader, epochs=10)

Training Classifier...
Epoch 1, Loss: 1.6098, Val Accuracy: 0.1974
Epoch 2, Loss: 1.6093, Val Accuracy: 0.2170
Epoch 3, Loss: 1.6059, Val Accuracy: 0.2064
Epoch 4, Loss: 1.6028, Val Accuracy: 0.2080
Epoch 5, Loss: 1.5976, Val Accuracy: 0.2269
Epoch 6, Loss: 1.5900, Val Accuracy: 0.2146
Epoch 7, Loss: 1.5831, Val Accuracy: 0.2031
Epoch 8, Loss: 1.5729, Val Accuracy: 0.2072
Epoch 9, Loss: 1.5619, Val Accuracy: 0.2056
Epoch 10, Loss: 1.5494, Val Accuracy: 0.2031
Training RNN Model...
Epoch 1, Loss: 1.6104, Val Accuracy: 0.2088
Epoch 2, Loss: 1.6098, Val Accuracy: 0.1925
Epoch 3, Loss: 1.6087, Val Accuracy: 0.1966
Epoch 4, Loss: 1.6068, Val Accuracy: 0.1998
Epoch 5, Loss: 1.6056, Val Accuracy: 0.2064
Epoch 6, Loss: 1.6014, Val Accuracy: 0.2138
Epoch 7, Loss: 1.5968, Val Accuracy: 0.2015
Epoch 8, Loss: 1.5904, Val Accuracy: 0.2129
Epoch 9, Loss: 1.5839, Val Accuracy: 0.1982
Epoch 10, Loss: 1.5773, Val Accuracy: 0.1892


In [40]:
def ask_question(question, model, label_map):
    # Preprocess the question
    question_tokens = preprocess(question)
    question_embedding = get_embedding(question_tokens, wv)

    # Convert to PyTorch tensor
    question_tensor = torch.tensor(question_embedding, dtype=torch.float32).unsqueeze(0)  # Add batch dimension

    # Predict answer
    model.eval()
    with torch.no_grad():
        output = model(question_tensor)
        _, predicted_label = torch.max(output, 1)

    # Reverse the label map to get the actual answer choice
    reversed_label_map = {v: k for k, v in label_map.items()}
    predicted_answer = reversed_label_map[predicted_label.item()]

    # Print question and answer
    print(f"Question: {question}")
    print(f"Predicted Answer: {predicted_answer}")
    return predicted_answer

# Example usage
question = "Where do you put your grapes just before checking out?"
predicted_answer = ask_question(question, classifier, label_map)  # Try classifier or rnn_model

Question: Where do you put your grapes just before checking out?
Predicted Answer: D
