In [1]:
# importing libs
import torch
from torch import nn
import pandas as pd
from torch.nn import functional as F
from sklearn.model_selection import train_test_split

# setting a random seed
torch.manual_seed(42)

<torch._C.Generator at 0x2add2a8fb30>

In [2]:
# device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [3]:
# importing dataset
df = pd.read_csv('spam.csv', encoding='ISO-8859-1')

# assigning dataset and labels
X = df["v2"]
y = df["v1"]

X, y

(0       Go until jurong point, crazy.. Available only ...
 1                           Ok lar... Joking wif u oni...
 2       Free entry in 2 a wkly comp to win FA Cup fina...
 3       U dun say so early hor... U c already then say...
 4       Nah I don't think he goes to usf, he lives aro...
                               ...                        
 5567    This is the 2nd time we have tried 2 contact u...
 5568                Will Ì_ b going to esplanade fr home?
 5569    Pity, * was in mood for that. So...any other s...
 5570    The guy did some bitching but I acted like i'd...
 5571                           Rofl. Its true to its name
 Name: v2, Length: 5572, dtype: object,
 0        ham
 1        ham
 2       spam
 3        ham
 4        ham
         ... 
 5567    spam
 5568     ham
 5569     ham
 5570     ham
 5571     ham
 Name: v1, Length: 5572, dtype: object)

In [4]:
# Splitting data for training and testing the model
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)

# Checking the split
len(X_train), len(X_test), len(X)

(4457, 1115, 5572)

In [5]:
# converting the data and label into list
X_train = X_train.to_list()
X_test = X_test.to_list()
y_train = y_train.to_list()
y_test = y_test.to_list()

In [40]:
# mapping each word in vocab unique intergers
word_to_ix = {}
for sentence in X_train + X_test:
    sent = sentence.split()
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
VOCAB_SIZE = len(word_to_ix)
NUM_LABELS = 2

In [7]:
# creating out model
class BoWClassifier(nn.Module):
    def __init__(self, num_labels, vocab_size):
        super(BoWClassifier, self).__init__()
        self.linear = nn.Linear(in_features=vocab_size,
                               out_features=num_labels)

    def forward(self, bow_vec):
        return F.log_softmax(self.linear(bow_vec), dim=1)


def make_bow_vector(sentence, word_to_ix):
    vec = torch.zeros(len(word_to_ix))
    for word in sentence:
        vec[word_to_ix[word]] +=1
    return vec.view(1,-1)
    

def make_target(label, label_to_xi):
    return torch.LongTensor([label_to_xi[label]])
    

model = BoWClassifier(NUM_LABELS, VOCAB_SIZE).to(device)


for param in model.parameters():
    print(param)
    

with torch.inference_mode():
    sample = X_train[0].split(), y_train[0]
    bow_vector = make_bow_vector(sample[0], word_to_ix)
    log_probs = model(bow_vector.to(device))
    print(log_probs)

Parameter containing:
tensor([[ 0.0061,  0.0066, -0.0019,  ..., -0.0057, -0.0018,  0.0047],
        [ 0.0003,  0.0077, -0.0065,  ..., -0.0043, -0.0024,  0.0052]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([0.0071, 0.0077], device='cuda:0', requires_grad=True)
tensor([[-0.6719, -0.7148]], device='cuda:0')


In [8]:
# Assigning interger to labels
label_to_ix = {"spam" : 0, "ham" : 1}

In [9]:
# Setting up loss and optimizer 
loss_fn = nn.NLLLoss()
optimizier = torch.optim.SGD(model.parameters(),
                           lr=0.01)

In [11]:
# training loop
epochs = 100
print(f"The size of X_train: {len(X_train)}")

for epoch in range(epochs):
    model.train()
    for i in range(len(X_train)):

        bow_vec = make_bow_vector(X_train[i].split(), word_to_ix)
        target = make_target(y_train[i], label_to_ix)
        optimizier.zero_grad()
        log_probs = model(bow_vec.to(device))

        loss = loss_fn(log_probs, target.to(device))
        loss.backward()
        optimizier.step()

    if epoch%10 == 0:
        model.eval()
        with torch.inference_mode():
            for i in range(len(X_test)):
                bow_vec = make_bow_vector(X_test[i].split(), word_to_ix)
                test_log_probs = model(bow_vec.to(device))
                test_target = make_target(y_test[i], label_to_ix)
                test_loss = loss_fn(test_log_probs, test_target.to(device))
        
        print(f"Epoch: {epoch} | Loss: {loss} | Test Loss: {test_loss}")

The size of X_train: 4457
Epoch: 0 | Loss: 0.05953257158398628 | Test Loss: 1.8723973035812378
Epoch: 10 | Loss: 0.014506937935948372 | Test Loss: 1.145749568939209
Epoch: 20 | Loss: 0.008510512299835682 | Test Loss: 0.9311169385910034
Epoch: 30 | Loss: 0.00580280926078558 | Test Loss: 0.8240097165107727
Epoch: 40 | Loss: 0.004236295353621244 | Test Loss: 0.7672484517097473
Epoch: 50 | Loss: 0.0032528128940612078 | Test Loss: 0.7330217361450195
Epoch: 60 | Loss: 0.0026007420383393764 | Test Loss: 0.7093334794044495
Epoch: 70 | Loss: 0.002145014703273773 | Test Loss: 0.6915085315704346
Epoch: 80 | Loss: 0.0018120075110346079 | Test Loss: 0.6773880124092102
Epoch: 90 | Loss: 0.0015600664773955941 | Test Loss: 0.6657783389091492


In [12]:
model.eval()
with torch.inference_mode():
    for i in range(len(X_test)):
        bow_vec = make_bow_vector(X_test[i].split(), word_to_ix)
        test_log_probs = model(bow_vec.to(device))
        test_target = make_target(y_test[i], label_to_ix)
        test_loss = loss_fn(test_log_probs, test_target.to(device))
print("Finally:")
print(f"Loss: {loss} | Test Loss: {test_loss}")

print(next(model.parameters())[:, word_to_ix["won"]])

Finally:
Loss: 0.0013815154088661075 | Test Loss: 0.6568889617919922
tensor([ 1.0921, -1.0920], device='cuda:0', grad_fn=<SelectBackward0>)


In [28]:
from pathlib import Path

# 1. Create model directory
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

# 2. Create model save path
MODEL_NAME = "spam_prediction_model_0.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

# 3. Save the model state_dict
print(f"Saving model to: {MODEL_SAVE_PATH}")
torch.save(obj=model.state_dict(),
          f=MODEL_SAVE_PATH)

Saving model to: models\spam_prediction_model_0.pth


In [32]:
#Loading the model
# Instantiating a new instance of our model class to load our model
loaded_model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)

# Load the saved state_dict for model_0
loaded_model.load_state_dict(torch.load(f=MODEL_SAVE_PATH))

  loaded_model.load_state_dict(torch.load(f=MODEL_SAVE_PATH))


<All keys matched successfully>

In [38]:
model.state_dict() ,loaded_model.state_dict() # The model have been loaded successfully :)

(OrderedDict([('linear.weight',
               tensor([[-0.2643, -0.1546, -0.4388,  ..., -0.0057, -0.0018,  0.0047],
                       [ 0.2707,  0.1689,  0.4304,  ..., -0.0043, -0.0024,  0.0052]],
                      device='cuda:0')),
              ('linear.bias', tensor([-2.8458,  2.8605], device='cuda:0'))]),
 OrderedDict([('linear.weight',
               tensor([[-0.2643, -0.1546, -0.4388,  ..., -0.0057, -0.0018,  0.0047],
                       [ 0.2707,  0.1689,  0.4304,  ..., -0.0043, -0.0024,  0.0052]])),
              ('linear.bias', tensor([-2.8458,  2.8605]))]))