In [None]:
#import libraries
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load and preprocess data

In [None]:
data = pd.read_csv("/content/spam.csv", encoding="ISO-8859-1")
data.head(5)

Unnamed: 0,v1,v2,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,ham,"Go until jurong point, crazy.. Available only ...",,,
1,ham,Ok lar... Joking wif u oni...,,,
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,,,
3,ham,U dun say so early hor... U c already then say...,,,
4,ham,"Nah I don't think he goes to usf, he lives aro...",,,


In [None]:
data = data.drop(['Unnamed: 2', "Unnamed: 3", "Unnamed: 4"], axis=1)
messages = data['v2'].tolist()
labels = data['v1'].tolist()
labels = [1 if label == 'spam' else 0 for label in labels]
labels = torch.tensor(labels)
# Split data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(messages, labels, test_size=0.2, random_state=42)

In [None]:
# Create vectorizer
vectorizer = CountVectorizer()
vectorizer.fit(x_train)

In [None]:
# Transform data into feature vectors
x_train = vectorizer.transform(x_train).toarray()
x_test = vectorizer.transform(x_test).toarray()

In [None]:
# Transform class labels into PyTorch tensors
y_train = y_train.float()
y_test = y_test.float()

#Define and train the model

In [None]:
# Define hyperparameters
input_size = x_train.shape[1]
hidden_size = 64
output_size = 1
learning_rate = 0.01
num_epochs = 100

# Define model
class SpamFilter(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SpamFilter, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.layer2 = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.layer1(x)
        out = self.sigmoid(out)
        out = self.layer2(out)
        out = self.sigmoid(out)
        return out

model = SpamFilter(input_size, hidden_size, output_size)

In [None]:
# Define loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)

In [None]:
# Train model
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(torch.from_numpy(x_train).float())
    loss = criterion(outputs.squeeze(), y_train)

    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print intermediate results
    if (epoch+1) % 10 == 0:
        print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

Epoch [10/100], Loss: 0.0480
Epoch [20/100], Loss: 0.0186
Epoch [30/100], Loss: 0.0109
Epoch [40/100], Loss: 0.0075
Epoch [50/100], Loss: 0.0056
Epoch [60/100], Loss: 0.0044
Epoch [70/100], Loss: 0.0035
Epoch [80/100], Loss: 0.0029
Epoch [90/100], Loss: 0.0025
Epoch [100/100], Loss: 0.0022


#Сheck the metrics and the result

In [None]:
# Evaluate model
with torch.no_grad():
    outputs = model(torch.from_numpy(x_test).float())
    predicted = (outputs >= 0.5).long()

# Convert y_test to integer tensor
y_test_int = y_test.long()

# Compute evaluation metrics
accuracy = accuracy_score(y_test_int, predicted)
precision = precision_score(y_test_int, predicted)
recall = recall_score(y_test_int, predicted)
f1 = f1_score(y_test_int, predicted)

print('Accuracy: {:.2f}, Precision: {:.2f}, Recall: {:.2f}, F1: {:.2f}'.format(accuracy, precision, recall, f1))

Accuracy: 0.98, Precision: 1.00, Recall: 0.87, F1: 0.93


In [None]:
# Use the model for spam filtering
def is_spam(message):
    # Convert message to feature vector
    x = vectorizer.transform([message]).toarray()
    # Make prediction
    with torch.no_grad():
        output = model(torch.from_numpy(x).float())
        prediction = (output >= 0.5).float().item()
    # Return True if prediction is spam, False otherwise
    if prediction == 1:
        return "spam"
    else:
        return "not spam"

In [29]:
is_spam('REMINDER FROM Alberto: To get 2.50 pounds free call')

'spam'