In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import random
import time

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics import precision_recall_curve, auc, accuracy_score, confusion_matrix

import matplotlib.pyplot as plt
import seaborn as sns # plotting problem

!pip install contractions
import contractions # expanding contractions in text can't -> cannot

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

Collecting contractions
  Downloading contractions-0.1.73-py2.py3-none-any.whl.metadata (1.2 kB)
Collecting textsearch>=0.0.21 (from contractions)
  Downloading textsearch-0.0.24-py2.py3-none-any.whl.metadata (1.2 kB)
Collecting anyascii (from textsearch>=0.0.21->contractions)
  Downloading anyascii-0.3.2-py3-none-any.whl.metadata (1.5 kB)
Collecting pyahocorasick (from textsearch>=0.0.21->contractions)
  Downloading pyahocorasick-2.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (13 kB)
Downloading contractions-0.1.73-py2.py3-none-any.whl (8.7 kB)
Downloading textsearch-0.0.24-py2.py3-none-any.whl (7.6 kB)
Downloading anyascii-0.3.2-py3-none-any.whl (289 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m289.9/289.9 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyahocorasick-2.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (

In [2]:
torch.backends.cudnn.deterministic = True
random.seed(21)
np.random.seed(21)
torch.manual_seed(21)
torch.cuda.manual_seed_all(21)

In [3]:
import wandb
wandb.login(key = '02c8923278a3dc82932fafb9959cd6d7587dacc7')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [4]:
df = pd.read_json('/kaggle/input/amazon-product-review-spam-and-non-spam/Toys_and_Games/Toys_and_Games.json', lines=True).head(100000)
df = df.loc[:, ['reviewText', 'class']]
df

Unnamed: 0,reviewText,class
0,I love these felt nursery rhyme characters and...,1
1,I see no directions for its use. Therefore I h...,0
2,This is a great tool for any teacher using the...,1
3,"Great product, thank you! Our son loved the pu...",1
4,Although not as streamlined as the Algebra I m...,1
...,...,...
99995,Received this product in a timely fashion. I m...,0
99996,McFarlane Sports Series are fantastic and life...,1
99997,Fortune is a good figure. She has a very attra...,1
99998,I just thought that I'd jot a few words to let...,0


In [5]:
df['reviewText'] = df['reviewText'].apply(lambda x: contractions.fix(x))
df.loc[:, 'reviewText'] = df['reviewText'].str.lower()

# \W represents Special characters like "$" and "!!!"
df.loc[:, 'reviewText'] = df['reviewText'].str.replace('\W', ' ')

# \d represents Numeric digits like "19.99"
df.loc[:, 'reviewText'] = df['reviewText'].str.replace('\d', ' ')

df

Unnamed: 0,reviewText,class
0,i love these felt nursery rhyme characters and...,1
1,i see no directions for its use. therefore i h...,0
2,this is a great tool for any teacher using the...,1
3,"great product, thank you! our son loved the pu...",1
4,although not as streamlined as the algebra i m...,1
...,...,...
99995,received this product in a timely fashion. i m...,0
99996,mcfarlane sports series are fantastic and life...,1
99997,fortune is a good figure. she has a very attra...,1
99998,i just thought that i would jot a few words to...,0


In [6]:
def pr_auc_score(y_test, y_pred):
    precision, recall, _ = precision_recall_curve(y_test, y_pred)
    pr_auc = auc(recall, precision)
    return pr_auc

In [7]:
y = df['class']
X_train, X_test, y_train, y_test = train_test_split(df['reviewText'], y, test_size=0.33, random_state=21)

In [8]:
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

In [9]:
class SpamDataset(Dataset):
    def __init__(self, texts, labels):
        self.texts = torch.tensor(texts, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, index):
        text = self.texts[index]
        text = text.unsqueeze(0)
        return text, self.labels[index]

In [10]:
class LSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMClassifier, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        batch_size = x.size(0)
        # Initialize the hidden state and cell state with appropriate dimensions
        h0 = torch.zeros(1, batch_size, 128).to(device)
        c0 = torch.zeros(1, batch_size, 128).to(device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

In [11]:
class GRUClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(GRUClassifier, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        batch_size = x.size(0)
        # Initialize the hidden state with appropriate dimensions
        h0 = torch.zeros(1, batch_size, 128).to(device)
        out, _ = self.gru(x, h0)
        out = self.fc(out[:, -1, :])
        return out

In [12]:
def train_and_eval(train_loader, test_loader, model, criterion, optimizer):
    
    wandb.watch(model, criterion, log="all", log_freq=10)
    start_time = time.time()
    # Training loop
    num_epochs = 15
    for epoch in range(num_epochs):
        model.train()
        for texts, labels in train_loader:
            texts, labels = texts.to(device), labels.to(device)
            outputs = model(texts)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        wandb.log({"epoch": epoch,"loss": loss})
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

    # Evaluation
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for texts, labels in test_loader:
            texts, labels = texts.to(device), labels.to(device)
            outputs = model(texts)
            _, predicted = torch.max(outputs, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Calculate accuracy and PR AUC score
    accuracy = accuracy_score(all_labels, all_preds)
    pr_auc = pr_auc_score(all_labels, all_preds)
    
    wandb.log({"test_accuracy": accuracy})
    wandb.log({"test_pr_auc": pr_auc})

    print(f'Accuracy: {accuracy:.4f}')
    print(f'AUC Score: {pr_auc:.4f}')
    
    elapsed_time = time.time() - start_time
    wandb.log({"run_time_sec": elapsed_time})
    wandb.finish()
    return model

# LSTM

### CountVectorizer 

In [13]:
# Vectorization of text data (maybe change this to other embeddings or tokenization)
count_vectorizer = CountVectorizer(stop_words='english', max_df = 0.8, max_features = 2000)
X_train_vectorized = count_vectorizer.fit_transform(X_train).toarray()
X_test_vectorized = count_vectorizer.transform(X_test).toarray()

# Create PyTorch datasets
train_dataset = SpamDataset(X_train_vectorized, y_train)
test_dataset = SpamDataset(X_test_vectorized, y_test)

# Create PyTorch data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Initialize device, model, optimizer, and loss function
model = LSTMClassifier(input_size=2000, hidden_size=128, num_layers=1, num_classes=2).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

wandb.init(project='RNN Spam Detection', name='LSTM CountVectorizer')

train_and_eval(train_loader, test_loader, model, criterion, optimizer)

[34m[1mwandb[0m: Currently logged in as: [33mmint21[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: wandb version 0.17.0 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade
[34m[1mwandb[0m: Tracking run with wandb version 0.16.6
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20240508_175641-yjb05y24[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mLSTM CountVectorizer[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/mint21/RNN%20Spam%20Detection[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/mint21/RNN%20Spam%20Detection/runs/yjb05y24[0m


Epoch [1/15], Loss: 0.2492
Epoch [2/15], Loss: 0.2750
Epoch [3/15], Loss: 0.2335
Epoch [4/15], Loss: 0.1443
Epoch [5/15], Loss: 0.1645
Epoch [6/15], Loss: 0.0787
Epoch [7/15], Loss: 0.0582
Epoch [8/15], Loss: 0.0138
Epoch [9/15], Loss: 0.0046
Epoch [10/15], Loss: 0.0189
Epoch [11/15], Loss: 0.0072
Epoch [12/15], Loss: 0.0010
Epoch [13/15], Loss: 0.0039
Epoch [14/15], Loss: 0.0003
Epoch [15/15], Loss: 0.0004
Accuracy: 0.9020
AUC Score: 0.9605


[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:         epoch ▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
[34m[1mwandb[0m:          loss ▇█▇▅▅▃▂▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:  run_time_sec ▁
[34m[1mwandb[0m: test_accuracy ▁
[34m[1mwandb[0m:   test_pr_auc ▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:         epoch 14
[34m[1mwandb[0m:          loss 0.00042
[34m[1mwandb[0m:  run_time_sec 93.98166
[34m[1mwandb[0m: test_accuracy 0.90203
[34m[1mwandb[0m:   test_pr_auc 0.96052
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mLSTM CountVectorizer[0m at: [34m[4mhttps://wandb.ai/mint21/RNN%20Spam%20Detection/runs/yjb05y24[0m
[34m[1mwandb[0m: ⭐️ View project at: [34m[4mhttps://wandb.ai/mint21/RNN%20Spam%20Detection[0m
[34m[1mwandb[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)
[34m[1mwandb[0m: 

LSTMClassifier(
  (lstm): LSTM(2000, 128, batch_first=True)
  (fc): Linear(in_features=128, out_features=2, bias=True)
)

### TfidfVectorizer

In [14]:
vectorizer = TfidfVectorizer(stop_words="english", max_df=0.8, max_features=2000)
X_train_vectorized = vectorizer.fit_transform(X_train).toarray()
X_test_vectorized = vectorizer.transform(X_test).toarray()

# Create PyTorch datasets
train_dataset = SpamDataset(X_train_vectorized, y_train)
test_dataset = SpamDataset(X_test_vectorized, y_test)

# Create PyTorch data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Initialize device, model, optimizer, and loss function
model = LSTMClassifier(input_size=2000, hidden_size=128, num_layers=1, num_classes=2).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

wandb.init(project='RNN Spam Detection', name='LSTM TfidfVectorizer')

train_and_eval(train_loader, test_loader, model, criterion, optimizer)

[34m[1mwandb[0m: wandb version 0.17.0 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade
[34m[1mwandb[0m: Tracking run with wandb version 0.16.6
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20240508_175844-76pl85bi[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mLSTM TfidfVectorizer[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/mint21/RNN%20Spam%20Detection[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/mint21/RNN%20Spam%20Detection/runs/76pl85bi[0m


Epoch [1/15], Loss: 0.1418
Epoch [2/15], Loss: 0.2081
Epoch [3/15], Loss: 0.1894
Epoch [4/15], Loss: 0.0968
Epoch [5/15], Loss: 0.1044
Epoch [6/15], Loss: 0.2505
Epoch [7/15], Loss: 0.2047
Epoch [8/15], Loss: 0.1602
Epoch [9/15], Loss: 0.1922
Epoch [10/15], Loss: 0.3542
Epoch [11/15], Loss: 0.0971
Epoch [12/15], Loss: 0.1123
Epoch [13/15], Loss: 0.1121
Epoch [14/15], Loss: 0.1559
Epoch [15/15], Loss: 0.1806
Accuracy: 0.9072
AUC Score: 0.9638


[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:         epoch ▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
[34m[1mwandb[0m:          loss ▂▄▄▁▁▅▄▃▄█▁▁▁▃▃
[34m[1mwandb[0m:  run_time_sec ▁
[34m[1mwandb[0m: test_accuracy ▁
[34m[1mwandb[0m:   test_pr_auc ▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:         epoch 14
[34m[1mwandb[0m:          loss 0.18063
[34m[1mwandb[0m:  run_time_sec 94.27006
[34m[1mwandb[0m: test_accuracy 0.90718
[34m[1mwandb[0m:   test_pr_auc 0.96379
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mLSTM TfidfVectorizer[0m at: [34m[4mhttps://wandb.ai/mint21/RNN%20Spam%20Detection/runs/76pl85bi[0m
[34m[1mwandb[0m: ⭐️ View project at: [34m[4mhttps://wandb.ai/mint21/RNN%20Spam%20Detection[0m
[34m[1mwandb[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)
[34m[1mwandb[0m: 

LSTMClassifier(
  (lstm): LSTM(2000, 128, batch_first=True)
  (fc): Linear(in_features=128, out_features=2, bias=True)
)

# GRU

### CountVectorizer

In [15]:
# Vectorization of text data (maybe change this to other embeddings or tokenization)
count_vectorizer = CountVectorizer(stop_words='english', max_df = 0.8, max_features = 2000)
X_train_vectorized = count_vectorizer.fit_transform(X_train).toarray()
X_test_vectorized = count_vectorizer.transform(X_test).toarray()

# Create PyTorch datasets
train_dataset = SpamDataset(X_train_vectorized, y_train)
test_dataset = SpamDataset(X_test_vectorized, y_test)

# Create PyTorch data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Initialize device, model, optimizer, and loss function
model = GRUClassifier(input_size=2000, hidden_size=128, num_layers=1, num_classes=2).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

wandb.init(project='RNN Spam Detection', name='GRU CountVectorizer')

train_and_eval(train_loader, test_loader, model, criterion, optimizer)

[34m[1mwandb[0m: wandb version 0.17.0 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade
[34m[1mwandb[0m: Tracking run with wandb version 0.16.6
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20240508_180048-coe606or[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mGRU CountVectorizer[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/mint21/RNN%20Spam%20Detection[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/mint21/RNN%20Spam%20Detection/runs/coe606or[0m


Epoch [1/15], Loss: 0.2073
Epoch [2/15], Loss: 0.3498
Epoch [3/15], Loss: 0.1688
Epoch [4/15], Loss: 0.1492
Epoch [5/15], Loss: 0.1938
Epoch [6/15], Loss: 0.0416
Epoch [7/15], Loss: 0.0618
Epoch [8/15], Loss: 0.0270
Epoch [9/15], Loss: 0.0688
Epoch [10/15], Loss: 0.0208
Epoch [11/15], Loss: 0.0243
Epoch [12/15], Loss: 0.0011
Epoch [13/15], Loss: 0.0078
Epoch [14/15], Loss: 0.0057
Epoch [15/15], Loss: 0.0006
Accuracy: 0.9015
AUC Score: 0.9616


[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:         epoch ▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
[34m[1mwandb[0m:          loss ▅█▄▄▅▂▂▂▂▁▁▁▁▁▁
[34m[1mwandb[0m:  run_time_sec ▁
[34m[1mwandb[0m: test_accuracy ▁
[34m[1mwandb[0m:   test_pr_auc ▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:         epoch 14
[34m[1mwandb[0m:          loss 0.00064
[34m[1mwandb[0m:  run_time_sec 83.67921
[34m[1mwandb[0m: test_accuracy 0.90152
[34m[1mwandb[0m:   test_pr_auc 0.9616
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mGRU CountVectorizer[0m at: [34m[4mhttps://wandb.ai/mint21/RNN%20Spam%20Detection/runs/coe606or[0m
[34m[1mwandb[0m: ⭐️ View project at: [34m[4mhttps://wandb.ai/mint21/RNN%20Spam%20Detection[0m
[34m[1mwandb[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)
[34m[1mwandb[0m: Fi

GRUClassifier(
  (gru): GRU(2000, 128, batch_first=True)
  (fc): Linear(in_features=128, out_features=2, bias=True)
)

### Ifidf Vectorizer

In [16]:
vectorizer = TfidfVectorizer(stop_words="english", max_df=0.8, max_features=2000)
X_train_vectorized = vectorizer.fit_transform(X_train).toarray()
X_test_vectorized = vectorizer.transform(X_test).toarray()

# Create PyTorch datasets
train_dataset = SpamDataset(X_train_vectorized, y_train)
test_dataset = SpamDataset(X_test_vectorized, y_test)

# Create PyTorch data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Initialize device, model, optimizer, and loss function
model = GRUClassifier(input_size=2000, hidden_size=128, num_layers=1, num_classes=2).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

wandb.init(project='RNN Spam Detection', name='GRU TfidfVectorizer')

train_and_eval(train_loader, test_loader, model, criterion, optimizer)

[34m[1mwandb[0m: wandb version 0.17.0 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade
[34m[1mwandb[0m: Tracking run with wandb version 0.16.6
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20240508_180242-zp3ymu3h[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mGRU TfidfVectorizer[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/mint21/RNN%20Spam%20Detection[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/mint21/RNN%20Spam%20Detection/runs/zp3ymu3h[0m


Epoch [1/15], Loss: 0.1753
Epoch [2/15], Loss: 0.2660
Epoch [3/15], Loss: 0.1445
Epoch [4/15], Loss: 0.1970
Epoch [5/15], Loss: 0.1450
Epoch [6/15], Loss: 0.1286
Epoch [7/15], Loss: 0.1579
Epoch [8/15], Loss: 0.1147
Epoch [9/15], Loss: 0.1729
Epoch [10/15], Loss: 0.3026
Epoch [11/15], Loss: 0.1796
Epoch [12/15], Loss: 0.1558
Epoch [13/15], Loss: 0.1285
Epoch [14/15], Loss: 0.1289
Epoch [15/15], Loss: 0.2464
Accuracy: 0.9112
AUC Score: 0.9630


[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:         epoch ▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
[34m[1mwandb[0m:          loss ▃▇▂▄▂▂▃▁▃█▃▃▂▂▆
[34m[1mwandb[0m:  run_time_sec ▁
[34m[1mwandb[0m: test_accuracy ▁
[34m[1mwandb[0m:   test_pr_auc ▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:         epoch 14
[34m[1mwandb[0m:          loss 0.24637
[34m[1mwandb[0m:  run_time_sec 83.52071
[34m[1mwandb[0m: test_accuracy 0.91121
[34m[1mwandb[0m:   test_pr_auc 0.96299
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mGRU TfidfVectorizer[0m at: [34m[4mhttps://wandb.ai/mint21/RNN%20Spam%20Detection/runs/zp3ymu3h[0m
[34m[1mwandb[0m: ⭐️ View project at: [34m[4mhttps://wandb.ai/mint21/RNN%20Spam%20Detection[0m
[34m[1mwandb[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)
[34m[1mwandb[0m: F

GRUClassifier(
  (gru): GRU(2000, 128, batch_first=True)
  (fc): Linear(in_features=128, out_features=2, bias=True)
)