In [None]:
import torch
import sklearn
import pandas as pd
import csv

In [None]:
SpamHam = pd.read_csv('spam_ham_dataset.csv')
SpamHam.head(5)

In [None]:
import re
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score


def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

SpamHam['clean_text'] = SpamHam['text'].apply(clean_text)
SpamHam['clean_text'].head(5)


In [None]:
def create_vocab(texts):
    vocab = set()
    for text in texts:
        words = text.split()
        vocab.update(words)
    return {word: idx for idx, word in enumerate(sorted(vocab))}

def text_to_bow(text, vocab):
    vector = torch.zeros(len(vocab))
    for word in text.split():
        if word in vocab:
            vector[vocab[word]] += 1
    return vector

vocabulary = create_vocab(SpamHam['clean_text'])
X = torch.stack([text_to_bow(text, vocabulary) for text in SpamHam['clean_text']])
y = torch.tensor(SpamHam['label_num'].values)


In [None]:
class SVM(torch.nn.Module):
    def __init__(self, input_dim):
        super(SVM, self).__init__()
        self.linear = torch.nn.Linear(input_dim, 1)
    
    def forward(self, x):
        return self.linear(x)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize model, loss, and optimizer
model = SVM(X.shape[1])
criterion = torch.nn.HingeLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train).squeeze()
    y_train_normalized = 2 * y_train.float() - 1  # Convert to -1 and 1
    
    # Calculate loss
    loss = criterion(outputs, y_train_normalized)
    
    # Backward pass and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

In [None]:
# Evaluation
model.eval()
with torch.no_grad():
    test_outputs = model(X_test).squeeze()
    predicted = (test_outputs > 0).float()
    
    print("Accuracy:", accuracy_score(y_test, predicted))
    print("\nClassification Report:")
    print(classification_report(y_test, predicted))