In [None]:
import os
import re
import torch
import pickle
import numpy as np
import pandas as pd
from tqdm import tqdm
import torch.nn as nn
from nltk.tokenize import word_tokenize
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

!pip install kaggle



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive/TextClassificationAttention
#!kaggle datasets download adityajn105/glove6b50d
#!kaggle datasets download dushyantv/consumer_complaints

/content/drive/MyDrive/TextClassificationAttention


In [None]:
#!unzip consumer_complaints.zip
#!unzip glove6b50d.zip

In [None]:
data = pd.read_csv("/content/drive/MyDrive/TextClassificationAttention/Consumer_Complaints.csv")
print(data.columns)
print(data.head())


Index(['Date received', 'Product', 'Sub-product', 'Issue', 'Sub-issue',
       'Consumer Complaint', 'Company Public Response', 'Company', 'State',
       'ZIP code', 'Tags', 'Consumer consent provided?', 'Submitted via',
       'Date Sent to Company', 'Company Response to Consumer',
       'Timely response?', 'Consumer disputed?', 'Complaint ID',
       'Unnamed: 18'],
      dtype='object')
  Date received           Product     Sub-product  \
0    03-12-2014          Mortgage  Other mortgage   
1    10-01-2016  Credit reporting             NaN   
2    10/17/2016     Consumer Loan    Vehicle loan   
3    06-08-2014       Credit card             NaN   
4    09/13/2014   Debt collection     Credit card   

                                      Issue                   Sub-issue  \
0  Loan modification,collection,foreclosure                         NaN   
1    Incorrect information on credit report              Account status   
2                Managing the loan or lease                  

# Pre processing

In [None]:
lr = 0.0001
vec_len = 50
seq_len = 20
num_epochs = 50
label_col = "Product"
tokens_path = "/content/drive/MyDrive/TextClassificationAttention/tokens.pkl"
labels_path = "/content/drive/MyDrive/TextClassificationAttention/labels.pkl"
data_path = "/content/drive/MyDrive/TextClassificationAttention/Consumer_Complaints.csv"
model_path = "/content/drive/MyDrive/TextClassificationAttention/attention.pth"
model_path_multihead = "/content/drive/MyDrive/TextClassificationAttention/multihead.pth"
vocabulary_path = "/content/drive/MyDrive/TextClassificationAttention/vocabulary.pkl"
embeddings_path = "/content/drive/MyDrive/TextClassificationAttention/embeddings.pkl"
glove_vector_path = "/content/drive/MyDrive/TextClassificationAttention/glove.6B.50d.txt"
text_col_name = "Consumer Complaint"
label_encoder_path = "/content/drive/MyDrive/TextClassificationAttention/label_encoder.pkl"
product_map = {'Vehicle loan or lease': 'vehicle_loan',
               'Credit reporting, credit repair services, or other personal consumer reports': 'credit_report',
               'Credit card or prepaid card': 'card',
               'Money transfer, virtual currency, or money service': 'money_transfer',
               'virtual currency': 'money_transfer',
               'Mortgage': 'mortgage',
               'Payday loan, title loan, or personal loan': 'loan',
               'Debt collection': 'debt_collection',
               'Checking or savings account': 'savings_account',
               'Credit card': 'card',
               'Bank account or service': 'savings_account',
               'Credit reporting': 'credit_report',
               'Prepaid card': 'card',
               'Payday loan': 'loan',
               'Other financial service': 'others',
               'Virtual currency': 'money_transfer',
               'Student loan': 'loan',
               'Consumer Loan': 'loan',
               'Money transfers': 'money_transfer'}

In [None]:
def save_file(name, obj):
    """
    Function to save an object as pickle file
    """
    with open(name, 'wb') as f:
        pickle.dump(obj, f)


def load_file(name):
    """
    Function to load a pickle object
    """
    return pickle.load(open(name, "rb"))

Glove embedding

In [None]:
with open(glove_vector_path, "rt") as f:
    emb = f.readlines()

In [None]:
vocabulary, embeddings = [], []

for item in emb:
    vocabulary.append(item.split()[0])
    embeddings.append(item.split()[1:])

In [None]:
embeddings = np.array(embeddings, dtype=np.float32)

In [None]:
vocabulary = ["<pad>", "<unk>"] + vocabulary

In [None]:
embeddings = np.vstack([np.ones(50, dtype=np.float32),
                        np.mean(embeddings, axis=0),
                        embeddings])

In [None]:
save_file(embeddings_path, embeddings)
save_file(vocabulary_path, vocabulary)

# Process text

In [None]:
data = pd.read_csv(data_path)

In [None]:
data.dropna(subset=[text_col_name], inplace=True)

In [None]:
data.replace({label_col: product_map}, inplace=True)

# Encode labels

In [None]:
label_encoder = LabelEncoder()
label_encoder.fit(data[label_col])
labels = label_encoder.transform(data[label_col])

In [None]:
save_file(labels_path, labels)
save_file(label_encoder_path, label_encoder)

# Process text column


In [None]:
input_text = list(data[text_col_name])

In [None]:
len(input_text)

277814

# Convert to lowecase

In [None]:
input_text = [i.lower() for i in tqdm(input_text)]

100%|██████████| 277814/277814 [00:00<00:00, 737053.34it/s]


# Remove punctuations except apostrophe

In [None]:
input_text = [re.sub(r"[^\w\d'\s]+", " ", i)
              for i in tqdm(input_text)]

100%|██████████| 277814/277814 [00:13<00:00, 21047.88it/s]


# remove Numbers


In [None]:
input_text = [re.sub("\d+", "", i) for i in tqdm(input_text)]

100%|██████████| 277814/277814 [00:08<00:00, 31598.33it/s]


# Remove more than one consecutive instance of 'x'

In [None]:
input_text = [re.sub(r'[x]{2,}', "", i) for i in tqdm(input_text)]

100%|██████████| 277814/277814 [00:05<00:00, 47203.01it/s]


# Remove multiple spaces with single space

In [None]:
nput_text = [re.sub(' +', ' ', i) for i in tqdm(input_text)]

100%|██████████| 277814/277814 [00:22<00:00, 12217.23it/s]


# Tokenize the text

In [None]:
import nltk
nltk.download('punkt_tab')

tokens = [word_tokenize(t) for t in tqdm(input_text, mininterval=60)]

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

# Take the first 20 tokens in each complaint text

In [None]:
tokens = [i[:20] if len(i) > 19 else ['<pad>'] * (20 - len(i)) + i
          for i in tqdm(tokens)]


100%|██████████| 277814/277814 [00:03<00:00, 73475.70it/s] 


# Convert tokens to integer indices from vocabulary

In [None]:
def token_index(tokens, vocabulary, missing='<unk>'):
    """
    :param tokens: List of word tokens
    :param vocabulary: All words in the embeddings
    :param missing: Token for words not present in the vocabulary
    :return: List of integers representing the word tokens
    """
    idx_token = []
    for text in tqdm(tokens):
        idx_text = []
        for token in text:
            if token in vocabulary:
                idx_text.append(vocabulary.index(token))
            else:
                idx_text.append(vocabulary.index(missing))
        idx_token.append(idx_text)
    return idx_token


# save the tokens

In [None]:
tokens = token_index(tokens, vocabulary)
save_file(tokens_path, tokens)

100%|██████████| 277814/277814 [1:01:17<00:00, 75.54it/s]


#Create attention

In [None]:
dropout_prob = 0.5

In [None]:
class AttentionModel(nn.Module):
    def __init__(self, vec_len, seq_len, n_classes):
        super(AttentionModel, self).__init__()
        self.vec_len = vec_len
        self.seq_len = seq_len
        self.attn_weights = torch.cat([torch.tensor([[0.]]),
                                       torch.randn(vec_len, 1) /
                                       torch.sqrt(torch.tensor(vec_len))])
        self.attn_weights.requires_grad = True
        self.attn_weights = nn.Parameter(self.attn_weights)
        self.activation = nn.Tanh()
        self.softmax = nn.Softmax(dim=1)
        self.linear = nn.Linear(vec_len + 1, n_classes)
        self.dropout = nn.Dropout(dropout_prob)

    def forward(self, input_data):
        hidden = torch.matmul(input_data, self.attn_weights)
        hidden = self.activation(hidden)
        hidden = self.dropout(hidden)
        attn = self.softmax(hidden)
        attn = attn.repeat(1, 1, self.vec_len + 1).reshape(attn.shape[0],
                                                           self.seq_len,
                                                           self.vec_len + 1)
        attn_output = input_data * attn
        attn_output = torch.sum(attn_output, axis=1)
        output = self.linear(attn_output)
        return output

In [None]:
class MultiHeadAttention(nn.Module):
    def __init__(self, vec_len, seq_len, n_classes, num_heads=8):
        super(MultiHeadAttention, self).__init__()
        self.vec_len = vec_len
        self.seq_len = seq_len
        self.num_heads = num_heads
        self.head_dim = vec_len // num_heads  # Dimension of each head

        # Linear layers for query, key, and value
        self.query_linear = nn.Linear(vec_len + 1, vec_len)
        self.key_linear = nn.Linear(vec_len + 1, vec_len)
        self.value_linear = nn.Linear(vec_len + 1, vec_len)

        self.activation = nn.Tanh()
        self.softmax = nn.Softmax(dim=-1)  # Softmax applied along the last dimension
        self.linear = nn.Linear(vec_len, n_classes)

    def forward(self, input_data):
        batch_size = input_data.size(0)

        # Project input to query, key, and value
        query = self.query_linear(input_data)
        key = self.key_linear(input_data)
        value = self.value_linear(input_data)

        # Reshape for multi-head attention
        query = query.view(batch_size, self.seq_len, self.num_heads, self.head_dim)
        key = key.view(batch_size, self.seq_len, self.num_heads, self.head_dim)
        value = value.view(batch_size, self.seq_len, self.num_heads, self.head_dim)

        # Calculate attention scores
        scores = torch.matmul(query, key.transpose(-2, -1)) / torch.sqrt(torch.tensor(self.head_dim, dtype=torch.float32))
        attn_weights = self.softmax(scores)

        # Apply attention weights to value
        attn_output = torch.matmul(attn_weights, value)

        # Concatenate heads and project to output
        attn_output = attn_output.view(batch_size, self.seq_len, self.vec_len)
        attn_output = torch.sum(attn_output, axis=1)
        output = self.linear(attn_output)

        return output

#Create PyTorch dataset

In [None]:
class TextDataset(torch.utils.data.Dataset):

    def __init__(self, tokens, embeddings, labels):
        """
        :param tokens: List of word tokens
        :param embeddings: Word embeddings (from glove)
        :param labels: List of labels
        """
        self.tokens = tokens
        self.embeddings = embeddings
        self.labels = labels

    def __len__(self):
        return len(self.tokens)

    def __getitem__(self, idx):
        emb = torch.tensor(self.embeddings[self.tokens[idx], :])
        input_ = torch.cat((torch.ones(emb.shape[0],1), emb), dim=1)
        return torch.tensor(self.labels[idx]), input_

#Function to train the model

In [None]:
def train(train_loader, valid_loader, model, criterion, optimizer,
          device, num_epochs, model_path):
    """
    Function to train the model
    :param train_loader: Data loader for train dataset
    :param valid_loader: Data loader for validation dataset
    :param model: Model object
    :param criterion: Loss function
    :param optimizer: Optimizer
    :param device: CUDA or CPU
    :param num_epochs: Number of epochs
    :param model_path: Path to save the model
    """
    best_loss = 1e8
    for i in range(num_epochs):
        print(f"Epoch {i+1} of {num_epochs}")
        valid_loss, train_loss = [], []
        model.train()
        # Train loop
        for batch_labels, batch_data in tqdm(train_loader):
            # Move data to GPU if available
            batch_labels = batch_labels.to(device)
            batch_data = batch_data.to(device)
            # Forward pass
            batch_output = model(batch_data)
            batch_output = torch.squeeze(batch_output)
            # Calculate loss
            loss = criterion(batch_output, batch_labels)
            train_loss.append(loss.item())
            optimizer.zero_grad()
            # Backward pass
            loss.backward()
            # Gradient update step
            optimizer.step()
        model.eval()
        # Validation loop
        for batch_labels, batch_data in tqdm(valid_loader):
            # Move data to GPU if available
            batch_labels = batch_labels.to(device)
            batch_data = batch_data.to(device)
            # Forward pass
            batch_output = model(batch_data)
            batch_output = torch.squeeze(batch_output)
            # Calculate loss
            loss = criterion(batch_output, batch_labels)
            valid_loss.append(loss.item())
        t_loss = np.mean(train_loss)
        v_loss = np.mean(valid_loss)
        print(f"Train Loss: {t_loss}, Validation Loss: {v_loss}")
        if v_loss < best_loss:
            best_loss = v_loss
            # Save model if validation loss improves
            torch.save(model.state_dict(), model_path)
        print(f"Best Validation Loss: {best_loss}")

In [None]:
def train_multihead(train_loader, valid_loader, model, criterion, optimizer,
                  device, num_epochs, model_path):
    """
    Function to train the Multi-Head Attention model.

    Args:
        train_loader: Data loader for train dataset.
        valid_loader: Data loader for validation dataset.
        model: Multi-Head Attention model object.
        criterion: Loss function.
        optimizer: Optimizer.
        device: CUDA or CPU.
        num_epochs: Number of epochs.
        model_path: Path to save the model.
    """
    best_loss = 1e8
    for i in range(num_epochs):
        print(f"Epoch {i+1} of {num_epochs}")
        valid_loss, train_loss = [], []
        model.train()
        # Train loop
        for batch_labels, batch_data in tqdm(train_loader):
            # Move data to GPU if available
            batch_labels = batch_labels.to(device)
            batch_data = batch_data.to(device)
            # Forward pass
            batch_output = model(batch_data)
            batch_output = torch.squeeze(batch_output)
            # Calculate loss
            loss = criterion(batch_output, batch_labels)
            train_loss.append(loss.item())
            optimizer.zero_grad()
            # Backward pass
            loss.backward()
            # Gradient update step
            optimizer.step()
        model.eval()
        # Validation loop
        for batch_labels, batch_data in tqdm(valid_loader):
            # Move data to GPU if available
            batch_labels = batch_labels.to(device)
            batch_data = batch_data.to(device)
            # Forward pass
            batch_output = model(batch_data)
            batch_output = torch.squeeze(batch_output)
            # Calculate loss
            loss = criterion(batch_output, batch_labels)
            valid_loss.append(loss.item())
        t_loss = np.mean(train_loss)
        v_loss = np.mean(valid_loss)
        print(f"Train Loss: {t_loss}, Validation Loss: {v_loss}")
        if v_loss < best_loss:
            best_loss = v_loss
            # Save model if validation loss improves
            torch.save(model.state_dict(), model_path)
        print(f"Best Validation Loss: {best_loss}")

#Function to test the model

In [None]:
def test(test_loader, model, criterion, device):
    """
    Function to test the model
    :param test_loader: Data loader for test dataset
    :param model: Model object
    :param criterion: Loss function
    :param device: CUDA or CPU
    """
    model.eval()
    test_loss = []
    test_accu = []
    for batch_labels, batch_data in tqdm(test_loader):
        # Move data to device
        batch_labels = batch_labels.to(device)
        batch_data = batch_data.to(device)
        # Forward pass
        batch_output = model(batch_data)
        batch_output = torch.squeeze(batch_output)
        # Calculate loss
        loss = criterion(batch_output, batch_labels)
        test_loss.append(loss.item())
        batch_preds = torch.argmax(batch_output, axis=1)
        # Move predictions to CPU
        if torch.cuda.is_available():
            batch_labels = batch_labels.cpu()
            batch_preds = batch_preds.cpu()
        # Compute accuracy
        test_accu.append(accuracy_score(batch_labels.detach().
                                        numpy(),
                                        batch_preds.detach().
                                        numpy()))
    test_loss = np.mean(test_loss)
    test_accu = np.mean(test_accu)
    print(f"Test Loss: {test_loss}, Test Accuracy: {test_accu}")

In [None]:
def test_multihead(test_loader, model, criterion, device):
    """
    Function to test the Multi-Head Attention model.

    Args:
        test_loader: Data loader for test dataset.
        model: Multi-Head Attention model object.
        criterion: Loss function.
        device: CUDA or CPU.
    """
    model.eval()
    test_loss = []
    test_accu = []
    for batch_labels, batch_data in tqdm(test_loader):
        # Move data to device
        batch_labels = batch_labels.to(device)
        batch_data = batch_data.to(device)
        # Forward pass
        batch_output = model(batch_data)
        batch_output = torch.squeeze(batch_output)
        # Calculate loss
        loss = criterion(batch_output, batch_labels)
        test_loss.append(loss.item())
        batch_preds = torch.argmax(batch_output, axis=1)
        # Move predictions to CPU
        if torch.cuda.is_available():
            batch_labels = batch_labels.cpu()
            batch_preds = batch_preds.cpu()
        # Compute accuracy
        test_accu.append(accuracy_score(batch_labels.detach().numpy(),
                                        batch_preds.detach().numpy()))
    test_loss = np.mean(test_loss)
    test_accu = np.mean(test_accu)
    print(f"Test Loss: {test_loss}, Test Accuracy: {test_accu}")

#Train attention model

In [None]:
tokens = load_file(tokens_path)
labels = load_file(labels_path)
embeddings = load_file(embeddings_path)
label_encoder = load_file(label_encoder_path)
num_classes = len(label_encoder.classes_)
vocabulary = load_file(vocabulary_path)

Split data into train, validation and test sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(tokens, labels,
                                                    test_size=0.2)
X_train, X_valid, y_train, y_valid = train_test_split(X_train,
                                                      y_train,
                                                      test_size=0.25)

Create PyTorch datasets

In [None]:
train_dataset = TextDataset(X_train, embeddings, y_train)
valid_dataset = TextDataset(X_valid, embeddings, y_valid)
test_dataset = TextDataset(X_test, embeddings, y_test)

Create data loaders

In [None]:
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=16,
                                           shuffle=True,
                                           drop_last=True)
valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                           batch_size=16)
test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=16)

Create model object

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available()
                      else "cpu")
model = AttentionModel(vec_len, seq_len, num_classes)
#modelMulti = MultiHeadAttention(vec_len, seq_len, num_classes)

Move the model to GPU if available


In [None]:
if torch.cuda.is_available():
    model = model.cuda()
    #modelMulti = modelMulti.cuda()

Define loss function and optimizer

In [None]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
#optimizer = torch.optim.Adam(modelMulti.parameters(), lr=lr)

Training loop

In [None]:
train_multihead(train_loader, valid_loader, modelMulti, criterion, optimizer,
                device, num_epochs, model_path_multihead)

Epoch 1 of 50


  0%|          | 0/10418 [00:00<?, ?it/s]


RuntimeError: shape '[16, 20, 8, 6]' is invalid for input of size 16000

In [None]:
train(train_loader, valid_loader, model, criterion, optimizer,
      device, num_epochs, model_path)

Epoch 1 of 50


100%|██████████| 10418/10418 [00:26<00:00, 394.79it/s]
100%|██████████| 3473/3473 [00:04<00:00, 796.41it/s]


Train Loss: 1.5782883248795565, Validation Loss: 1.3664493417485866
Best Validation Loss: 1.3664493417485866
Epoch 2 of 50


100%|██████████| 10418/10418 [00:27<00:00, 385.21it/s]
100%|██████████| 3473/3473 [00:04<00:00, 716.32it/s]


Train Loss: 1.3465995493068372, Validation Loss: 1.2390583044096608
Best Validation Loss: 1.2390583044096608
Epoch 3 of 50


100%|██████████| 10418/10418 [00:25<00:00, 402.04it/s]
100%|██████████| 3473/3473 [00:06<00:00, 548.57it/s]


Train Loss: 1.2740934329415996, Validation Loss: 1.1789733898114514
Best Validation Loss: 1.1789733898114514
Epoch 4 of 50


100%|██████████| 10418/10418 [00:25<00:00, 402.75it/s]
100%|██████████| 3473/3473 [00:04<00:00, 789.14it/s]


Train Loss: 1.2370058572219096, Validation Loss: 1.1443308244992703
Best Validation Loss: 1.1443308244992703
Epoch 5 of 50


100%|██████████| 10418/10418 [00:26<00:00, 393.58it/s]
100%|██████████| 3473/3473 [00:04<00:00, 804.20it/s]


Train Loss: 1.2150090029936969, Validation Loss: 1.122633718479754
Best Validation Loss: 1.122633718479754
Epoch 6 of 50


100%|██████████| 10418/10418 [00:26<00:00, 397.60it/s]
100%|██████████| 3473/3473 [00:05<00:00, 659.26it/s]


Train Loss: 1.1996314941457924, Validation Loss: 1.1064507020178838
Best Validation Loss: 1.1064507020178838
Epoch 7 of 50


100%|██████████| 10418/10418 [00:25<00:00, 406.62it/s]
100%|██████████| 3473/3473 [00:05<00:00, 688.76it/s]


Train Loss: 1.1893811177317162, Validation Loss: 1.0952358779596931
Best Validation Loss: 1.0952358779596931
Epoch 8 of 50


100%|██████████| 10418/10418 [00:25<00:00, 400.94it/s]
100%|██████████| 3473/3473 [00:04<00:00, 796.56it/s]


Train Loss: 1.1781575664074047, Validation Loss: 1.0869193891609845
Best Validation Loss: 1.0869193891609845
Epoch 9 of 50


100%|██████████| 10418/10418 [00:26<00:00, 398.69it/s]
100%|██████████| 3473/3473 [00:04<00:00, 804.97it/s]


Train Loss: 1.1676110427080726, Validation Loss: 1.0794167643814652
Best Validation Loss: 1.0794167643814652
Epoch 10 of 50


100%|██████████| 10418/10418 [00:25<00:00, 400.79it/s]
100%|██████████| 3473/3473 [00:05<00:00, 678.08it/s]


Train Loss: 1.1614769969981562, Validation Loss: 1.0731943833477862
Best Validation Loss: 1.0731943833477862
Epoch 11 of 50


100%|██████████| 10418/10418 [00:25<00:00, 401.53it/s]
100%|██████████| 3473/3473 [00:05<00:00, 683.82it/s]


Train Loss: 1.156886012451986, Validation Loss: 1.0685538369642016
Best Validation Loss: 1.0685538369642016
Epoch 12 of 50


100%|██████████| 10418/10418 [00:26<00:00, 393.83it/s]
100%|██████████| 3473/3473 [00:04<00:00, 800.98it/s]


Train Loss: 1.1547216452839828, Validation Loss: 1.063138967112303
Best Validation Loss: 1.063138967112303
Epoch 13 of 50


100%|██████████| 10418/10418 [00:27<00:00, 385.32it/s]
100%|██████████| 3473/3473 [00:04<00:00, 747.09it/s]


Train Loss: 1.1464549577878107, Validation Loss: 1.0591279074845725
Best Validation Loss: 1.0591279074845725
Epoch 14 of 50


100%|██████████| 10418/10418 [00:26<00:00, 397.00it/s]
100%|██████████| 3473/3473 [00:05<00:00, 649.73it/s]


Train Loss: 1.1468757123647575, Validation Loss: 1.0573120358231909
Best Validation Loss: 1.0573120358231909
Epoch 15 of 50


100%|██████████| 10418/10418 [00:26<00:00, 393.04it/s]
100%|██████████| 3473/3473 [00:04<00:00, 751.92it/s]


Train Loss: 1.142767368720186, Validation Loss: 1.05310524226505
Best Validation Loss: 1.05310524226505
Epoch 16 of 50


100%|██████████| 10418/10418 [00:29<00:00, 357.05it/s]
100%|██████████| 3473/3473 [00:04<00:00, 743.60it/s]


Train Loss: 1.1401951713367715, Validation Loss: 1.052636213761532
Best Validation Loss: 1.052636213761532
Epoch 17 of 50


100%|██████████| 10418/10418 [00:29<00:00, 358.95it/s]
100%|██████████| 3473/3473 [00:05<00:00, 664.13it/s]


Train Loss: 1.1372400497744835, Validation Loss: 1.0495759302966368
Best Validation Loss: 1.0495759302966368
Epoch 18 of 50


100%|██████████| 10418/10418 [00:28<00:00, 362.97it/s]
100%|██████████| 3473/3473 [00:04<00:00, 746.97it/s]


Train Loss: 1.1383945821548611, Validation Loss: 1.0477485443754222
Best Validation Loss: 1.0477485443754222
Epoch 19 of 50


100%|██████████| 10418/10418 [00:28<00:00, 368.77it/s]
100%|██████████| 3473/3473 [00:05<00:00, 656.51it/s]


Train Loss: 1.1355626877317915, Validation Loss: 1.0476562892398675
Best Validation Loss: 1.0476562892398675
Epoch 20 of 50


100%|██████████| 10418/10418 [00:25<00:00, 405.90it/s]
100%|██████████| 3473/3473 [00:04<00:00, 756.01it/s]


Train Loss: 1.13153820673823, Validation Loss: 1.0462071848231436
Best Validation Loss: 1.0462071848231436
Epoch 21 of 50


100%|██████████| 10418/10418 [00:25<00:00, 405.27it/s]
100%|██████████| 3473/3473 [00:04<00:00, 807.82it/s]


Train Loss: 1.1313328656045598, Validation Loss: 1.0443309695000542
Best Validation Loss: 1.0443309695000542
Epoch 22 of 50


100%|██████████| 10418/10418 [00:25<00:00, 408.58it/s]
100%|██████████| 3473/3473 [00:04<00:00, 804.49it/s]


Train Loss: 1.1306902584604623, Validation Loss: 1.0438683879176376
Best Validation Loss: 1.0438683879176376
Epoch 23 of 50


100%|██████████| 10418/10418 [00:25<00:00, 404.65it/s]
100%|██████████| 3473/3473 [00:05<00:00, 679.91it/s]


Train Loss: 1.1287705696293937, Validation Loss: 1.043779445937069
Best Validation Loss: 1.043779445937069
Epoch 24 of 50


100%|██████████| 10418/10418 [00:25<00:00, 407.15it/s]
100%|██████████| 3473/3473 [00:04<00:00, 703.30it/s]


Train Loss: 1.126905924497742, Validation Loss: 1.0422429561512017
Best Validation Loss: 1.0422429561512017
Epoch 25 of 50


100%|██████████| 10418/10418 [00:25<00:00, 407.20it/s]
100%|██████████| 3473/3473 [00:04<00:00, 808.52it/s]


Train Loss: 1.1249702764171903, Validation Loss: 1.0420209182259537
Best Validation Loss: 1.0420209182259537
Epoch 26 of 50


100%|██████████| 10418/10418 [00:25<00:00, 406.87it/s]
100%|██████████| 3473/3473 [00:04<00:00, 803.80it/s]


Train Loss: 1.1251253409528943, Validation Loss: 1.0418502582430738
Best Validation Loss: 1.0418502582430738
Epoch 27 of 50


100%|██████████| 10418/10418 [00:25<00:00, 407.38it/s]
100%|██████████| 3473/3473 [00:04<00:00, 746.17it/s]


Train Loss: 1.1234087476313377, Validation Loss: 1.0417229850183054
Best Validation Loss: 1.0417229850183054
Epoch 28 of 50


100%|██████████| 10418/10418 [00:25<00:00, 413.01it/s]
100%|██████████| 3473/3473 [00:05<00:00, 660.45it/s]


Train Loss: 1.1233099998817218, Validation Loss: 1.0411206795336567
Best Validation Loss: 1.0411206795336567
Epoch 29 of 50


100%|██████████| 10418/10418 [00:25<00:00, 407.15it/s]
100%|██████████| 3473/3473 [00:04<00:00, 809.89it/s]


Train Loss: 1.1217205320107866, Validation Loss: 1.0418505189211473
Best Validation Loss: 1.0411206795336567
Epoch 30 of 50


100%|██████████| 10418/10418 [00:25<00:00, 405.75it/s]
100%|██████████| 3473/3473 [00:04<00:00, 796.30it/s]


Train Loss: 1.1207193037980256, Validation Loss: 1.0405620665533757
Best Validation Loss: 1.0405620665533757
Epoch 31 of 50


100%|██████████| 10418/10418 [00:25<00:00, 404.29it/s]
100%|██████████| 3473/3473 [00:04<00:00, 695.45it/s]


Train Loss: 1.120220062911316, Validation Loss: 1.042111759308673
Best Validation Loss: 1.0405620665533757
Epoch 32 of 50


100%|██████████| 10418/10418 [00:25<00:00, 412.30it/s]
100%|██████████| 3473/3473 [00:04<00:00, 701.80it/s]


Train Loss: 1.1182573061686008, Validation Loss: 1.0411074916498826
Best Validation Loss: 1.0405620665533757
Epoch 33 of 50


100%|██████████| 10418/10418 [00:25<00:00, 400.90it/s]
100%|██████████| 3473/3473 [00:04<00:00, 800.05it/s]


Train Loss: 1.1179049815056443, Validation Loss: 1.0410930812702102
Best Validation Loss: 1.0405620665533757
Epoch 34 of 50


100%|██████████| 10418/10418 [00:25<00:00, 406.91it/s]
100%|██████████| 3473/3473 [00:04<00:00, 807.64it/s]


Train Loss: 1.1190077551102589, Validation Loss: 1.0418709293844512
Best Validation Loss: 1.0405620665533757
Epoch 35 of 50


100%|██████████| 10418/10418 [00:25<00:00, 407.35it/s]
100%|██████████| 3473/3473 [00:05<00:00, 658.54it/s]


Train Loss: 1.1185301096963662, Validation Loss: 1.040711462660753
Best Validation Loss: 1.0405620665533757
Epoch 36 of 50


100%|██████████| 10418/10418 [00:25<00:00, 408.02it/s]
100%|██████████| 3473/3473 [00:04<00:00, 732.54it/s]


Train Loss: 1.1165409298536073, Validation Loss: 1.0403828531924153
Best Validation Loss: 1.0403828531924153
Epoch 37 of 50


100%|██████████| 10418/10418 [00:25<00:00, 405.38it/s]
100%|██████████| 3473/3473 [00:04<00:00, 794.12it/s]


Train Loss: 1.116193170464183, Validation Loss: 1.0403389860273884
Best Validation Loss: 1.0403389860273884
Epoch 38 of 50


100%|██████████| 10418/10418 [00:26<00:00, 394.32it/s]
100%|██████████| 3473/3473 [00:04<00:00, 725.39it/s]


Train Loss: 1.1157878326246025, Validation Loss: 1.0403176927099713
Best Validation Loss: 1.0403176927099713
Epoch 39 of 50


100%|██████████| 10418/10418 [00:28<00:00, 364.52it/s]
100%|██████████| 3473/3473 [00:05<00:00, 667.47it/s]


Train Loss: 1.1151010630472444, Validation Loss: 1.040374654764886
Best Validation Loss: 1.0403176927099713
Epoch 40 of 50


100%|██████████| 10418/10418 [00:28<00:00, 359.95it/s]
100%|██████████| 3473/3473 [00:04<00:00, 755.27it/s]


Train Loss: 1.1142291249784222, Validation Loss: 1.0405438242025895
Best Validation Loss: 1.0403176927099713
Epoch 41 of 50


100%|██████████| 10418/10418 [00:29<00:00, 357.47it/s]
100%|██████████| 3473/3473 [00:05<00:00, 666.42it/s]


Train Loss: 1.1141135482795868, Validation Loss: 1.040681621002389
Best Validation Loss: 1.0403176927099713
Epoch 42 of 50


100%|██████████| 10418/10418 [00:29<00:00, 358.15it/s]
100%|██████████| 3473/3473 [00:04<00:00, 737.13it/s]


Train Loss: 1.1129030530392998, Validation Loss: 1.0408458826420393
Best Validation Loss: 1.0403176927099713
Epoch 43 of 50


100%|██████████| 10418/10418 [00:27<00:00, 385.07it/s]
100%|██████████| 3473/3473 [00:05<00:00, 656.85it/s]


Train Loss: 1.1128404578210114, Validation Loss: 1.0406225925301256
Best Validation Loss: 1.0403176927099713
Epoch 44 of 50


100%|██████████| 10418/10418 [00:27<00:00, 383.94it/s]
100%|██████████| 3473/3473 [00:04<00:00, 753.45it/s]


Train Loss: 1.1128439395034992, Validation Loss: 1.0438532380233263
Best Validation Loss: 1.0403176927099713
Epoch 45 of 50


100%|██████████| 10418/10418 [00:28<00:00, 363.41it/s]
100%|██████████| 3473/3473 [00:05<00:00, 650.74it/s]


Train Loss: 1.1116518220778595, Validation Loss: 1.0420940908649763
Best Validation Loss: 1.0403176927099713
Epoch 46 of 50


100%|██████████| 10418/10418 [00:28<00:00, 363.26it/s]
100%|██████████| 3473/3473 [00:04<00:00, 743.97it/s]


Train Loss: 1.1133025378063193, Validation Loss: 1.0433361562054198
Best Validation Loss: 1.0403176927099713
Epoch 47 of 50


100%|██████████| 10418/10418 [00:29<00:00, 356.51it/s]
100%|██████████| 3473/3473 [00:05<00:00, 673.30it/s]


Train Loss: 1.1103992463531318, Validation Loss: 1.0425674009745312
Best Validation Loss: 1.0403176927099713
Epoch 48 of 50


100%|██████████| 10418/10418 [00:29<00:00, 354.24it/s]
100%|██████████| 3473/3473 [00:04<00:00, 769.20it/s]


Train Loss: 1.110471983729683, Validation Loss: 1.0431124942380965
Best Validation Loss: 1.0403176927099713
Epoch 49 of 50


100%|██████████| 10418/10418 [00:26<00:00, 387.75it/s]
100%|██████████| 3473/3473 [00:04<00:00, 800.20it/s]


Train Loss: 1.1106632096990785, Validation Loss: 1.042839816632272
Best Validation Loss: 1.0403176927099713
Epoch 50 of 50


100%|██████████| 10418/10418 [00:25<00:00, 405.43it/s]
100%|██████████| 3473/3473 [00:05<00:00, 656.02it/s]


Train Loss: 1.1084890892304713, Validation Loss: 1.042501206789997
Best Validation Loss: 1.0403176927099713


Test the model

In [None]:
train(train_loader, valid_loader,modelMulti, criterion, optimizer, device,
      num_epochs, model_path_multihead)

Epoch 1 of 50


  0%|          | 0/10418 [00:00<?, ?it/s]


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument mat1 in method wrapper_CUDA_addmm)

In [None]:
test(test_loader, model, criterion, device)

100%|██████████| 3473/3473 [00:06<00:00, 517.20it/s]

Test Loss: 1.0380816694521746, Test Accuracy: 0.648605803209172





#Predict on new text

In [None]:
input_text = '''I am a victim of Identity Theft & currently have an Experian account that
I can view my Experian Credit Report and getting notified when there is activity on
my Experian Credit Report. For the past 3 days I've spent a total of approximately 9
hours on the phone with Experian. Every time I call I get transferred repeatedly and
then my last transfer and automated message states to press 1 and leave a message and
someone would call me. Every time I press 1 I get an automatic message stating than you
before I even leave a message and get disconnected. I call Experian again, explain what
is happening and the process begins again with the same end result. I was trying to have
this issue attended and resolved informally but I give up after 9 hours. There are hard
hit inquiries on my Experian Credit Report that are fraud, I didn't authorize, or recall
and I respectfully request that Experian remove the hard hit inquiries immediately just
like they've done in the past when I was able to speak to a live Experian representative
in the United States. The following are the hard hit inquiries : BK OF XXXX XX/XX/XXXX
XXXX XXXX XXXX  XX/XX/XXXX XXXX  XXXX XXXX  XX/XX/XXXX XXXX  XX/XX/XXXX XXXX  XXXX
XX/XX/XXXX'''

Process input text

In [None]:
input_text = input_text.lower()
input_text = re.sub(r"[^\w\d'\s]+", " ", input_text)
input_text = re.sub("\d+", "", input_text)
input_text = re.sub(r'[x]{2,}', "", input_text)
input_text = re.sub(' +', ' ', input_text)
tokens = word_tokenize(input_text)

In [None]:
tokens = ['<pad>']*(20-len(tokens))+tokens

In [None]:
idx_token = []
for token in tokens:
    if token in vocabulary:
        idx_token.append(vocabulary.index(token))
    else:
        idx_token.append(vocabulary.index('<unk>'))

In [None]:
token_emb = embeddings[idx_token,:]
token_emb = token_emb[:seq_len, :]
inp = torch.from_numpy(token_emb)

In [None]:
inp = torch.cat((torch.ones(inp.shape[0],1), inp), dim=1)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available()
                      else "cpu")

In [None]:
inp = inp.to(device)
inp = torch.unsqueeze(inp, 0)

In [None]:
label_encoder = load_file(label_encoder_path)
num_classes = len(label_encoder.classes_)

In [None]:
# Create model object
model = AttentionModel(vec_len, seq_len, num_classes)

# Load trained weights
model.load_state_dict(torch.load(model_path))

# Move the model to GPU if available
if torch.cuda.is_available():
    model = model.cuda()

# Forward pass
out = torch.squeeze(model(inp))

# Find predicted class
prediction = label_encoder.classes_[torch.argmax(out)]
print(f"Predicted  Class: {prediction}")

Predicted  Class: credit_report


  model.load_state_dict(torch.load(model_path))


#save model

In [None]:
import torch

def save_model(model, model_path):
    """
    Saves the model to a file.

    Args:
        model: The model to save.
        model_path: The path to save the model to.
    """
    torch.save(model.state_dict(), model_path)
    print(f"Model saved to {model_path}")

def load_model(model, model_path):
    """
    Loads the model from a file.

    Args:
        model: The model to load.
        model_path: The path to load the model from.
    """
    model.load_state_dict(torch.load(model_path))
    print(f"Model loaded from {model_path}")

In [None]:
model_path = "/content/drive/MyDrive/TextClassificationAttention/models"
# Create an instance of the model
model_instance = AttentionModel(vec_len, seq_len, num_classes)
save_model(model_instance, model_path)  # Save the model instance

Model saved to /content/drive/MyDrive/TextClassificationAttention/models


In [None]:
model = AttentionModel(vec_len, seq_len, num_classes)  # Create the model instance
load_model(model, model_path)  # Load the saved weights

Model loaded from /content/drive/MyDrive/TextClassificationAttention/models


  model.load_state_dict(torch.load(model_path))
