In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.preprocessing import StandardScaler
import numpy as np

In [139]:
data_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/spambase/spambase.data"
data = pd.read_csv(data_url, header=None)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

In [140]:
scaler = StandardScaler()
X_transformed = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_transformed, y , test_size=0.25, random_state=42)

In [141]:
class EmailDataset(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y

    def __len__(self):
        return len(self.Y)

    def __getitem__(self, index):
        return torch.tensor(self.X[index], dtype=torch.float), torch.tensor(self.Y[index], dtype=torch.float)

train_dataset = EmailDataset(X_train, y_train)
test_dataset = EmailDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [142]:
class SpamClassifier(torch.nn.Module):
  def __init__(self, input_dim):
      super(SpamClassifier, self).__init__()
      self.layer = torch.nn.Sequential(
          torch.nn.Linear(input_dim, 64),
          torch.nn.ReLU(),
          torch.nn.Linear(64, 1),
          torch.nn.Sigmoid()
      )

  def forward(self, x):
      return self.layer(x).squeeze()

In [None]:
model = SpamClassifier(X_train.shape[1])
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

epochs = 50

for epoch in range(epochs):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()

    # Evaluation on Test set
    model.eval()
    total = 0
    correct = 0
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(test_loader):
            outputs = model(data)
            predicted = (outputs > 0.5).float()
            total += target.size(0)
            correct += (predicted == target).sum().item()
    print(f"Epoch {epoch+1}/{epochs}, Accuracy: {correct/total:.2f}")


In [163]:
def mock_transform(emails, feature_length):
    return scaler.transform(np.random.rand(len(emails), feature_length))


test_emails = [

"Congratulations! You've won a $1,000 Walmart gift card. Give me your password and user name. Click the link bellow and win the prize! Go to http://bit.ly/123456"

]

transformed_emails = mock_transform(test_emails, X_train.shape[1])


def predict_spam(emails_transformed, trained_model):
    email_tensor = torch.tensor(emails_transformed, dtype=torch.float)
    outputs = trained_model(email_tensor).squeeze()
    predictions = (outputs > 0.5).float().numpy()
    return predictions


predictions = predict_spam(transformed_emails, model)

if predictions==1:
  print('This is a SPAM!')
else:
  print('This is a HAM.')


This is a HAM.
