In [2]:
! pip install torchtext

[0mDefaulting to user installation because normal site-packages is not writeable
Collecting torchtext
  Obtaining dependency information for torchtext from https://files.pythonhosted.org/packages/1a/4b/40c40574e7f76cfea6b6b94928bb7d6ca44bf5aa1869347d8a71d7ff0563/torchtext-0.16.0-cp310-cp310-manylinux1_x86_64.whl.metadata
  Downloading torchtext-0.16.0-cp310-cp310-manylinux1_x86_64.whl.metadata (7.5 kB)
Collecting torchdata==0.7.0 (from torchtext)
  Obtaining dependency information for torchdata==0.7.0 from https://files.pythonhosted.org/packages/58/3f/e805df66f0308eebf735f794e87164013024924efac22b4432d7c09374ea/torchdata-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata
  Downloading torchdata-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading torchtext-0.16.0-cp310-cp310-manylinux1_x86_64.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m32.7 MB/s[0m eta [36m0:00:00[0ma 

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from collections import Counter
from torchtext.vocab import Vocab
from torch.utils.data import DataLoader, Dataset

In [2]:
# Enhanced Binary Text Classifier
class EnhancedBinaryTextClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim):
        super(EnhancedBinaryTextClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.dropout = nn.Dropout(p=0.5)
        self.fc1 = nn.Linear(embedding_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        x = self.embedding(x)
        x = torch.mean(x, dim=1)
        x = self.dropout(x)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return torch.sigmoid(x)

In [5]:
# Data Preparation
def preprocess(text):
    return text.lower().split()

def build_vocab(sentences):
    counter = Counter()
    for sentence in sentences:
        counter.update(preprocess(sentence))
    return Vocab(counter)

def encode(sentence, vocab):
    return [vocab.stoi[word] for word in preprocess(sentence)]

# Sample Statements
republican_statements = [
    "We must prioritize national security and strong borders.",
    "Lowering taxes is essential for economic growth and prosperity.",
    "It's crucial to defend the Second Amendment rights.",
    "Small government and individual freedoms are the core of our policy.",
    "Fiscal responsibility and balanced budgets should be our goal."
]

democratic_statements = [
    "Healthcare should be accessible and affordable for everyone.",
    "We need to address climate change with urgent environmental policies.",
    "Education funding is vital for the future of our country.",
    "We stand for social justice and equality for all citizens.",
    "Investing in renewable energy is key for a sustainable future."
]

all_statements = republican_statements + democratic_statements
labels = [0] * len(republican_statements) + [1] * len(democratic_statements)  # 0 for Republican, 1 for Democrat

In [6]:
# Build Vocabulary
vocab = build_vocab(all_statements)

# Encode Statements
encoded_data = [(encode(sentence, vocab), label) for sentence, label in zip(all_statements, labels)]

# Dataset and DataLoader
class TextDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

dataset = TextDataset(encoded_data)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

AttributeError: 'Vocab' object has no attribute 'stoi'

In [None]:
# Model Initialization
model = EnhancedBinaryTextClassifier(len(vocab), embedding_dim=10, hidden_dim=20)
loss_function = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training Loop
def train_model(model, epochs, dataloader):
    for epoch in range(epochs):
        total_loss = 0
        for inputs, target in dataloader:
            inputs = inputs[0]  # Unwrap batch
            model.zero_grad()
            outputs = model(inputs)
            loss = loss_function(outputs, target.float())
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch {epoch + 1}/{epochs}, Loss: {total_loss:.4f}')

In [None]:
# Training the Model
train_model(model, epochs=10, dataloader=dataloader)