<a href="https://colab.research.google.com/github/1830668NabilahOshin/Numerical-methods-and-Neural-Network-Labwork/blob/main/transformer1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# pre-trained word2vec model
import gensim.downloader as api
w2v_model = api.load("word2vec-google-news-300")



In [None]:
import pandas as pd
import torch
import torch.nn as nn
import re
import string
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import nltk
nltk.download('stopwords')
nltk.download('punkt')
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
import numpy as np
import math
import torch.optim as optim
from sklearn.utils import shuffle
from torch.nn import functional as F

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


#Loading dataset

In [None]:
data = pd.read_csv('restuarents.csv', encoding='ISO-8859-1')

print(data.head())

                                                text  label
0                           Wow... Loved this place.      1
1                                 Crust is not good.      0
2          Not tasty and the texture was just nasty.      0
3  Stopped by during the late May bank holiday of...      1
4  The selection on the menu was great and so wer...      1


#Text Preprocessing

In [None]:
class TextPreprocessor:
    def __init__(self):
        self.stop_words = set(stopwords.words('english'))
        self.punctuations = set(string.punctuation)

    def process_text(self, text):
        text = text.lower()
        text = re.sub(r'http\S+', '', text)
        text = re.sub(r'<.*?>', '', text)
        text = ''.join(char for char in text if char not in self.punctuations)
        tokens = word_tokenize(text)
        tokens = [token for token in tokens if token not in self.stop_words]
        text = ' '.join(tokens)

        return text

    def process_column(self, df, column_name):
        df[column_name] = df[column_name].apply(self.process_text)

        return df

preprocessor = TextPreprocessor()
df = preprocessor.process_column(data, 'text')

print(df.head())

                                                text  label
0                                    wow loved place      1
1                                         crust good      0
2                                tasty texture nasty      0
3  stopped late may bank holiday rick steve recom...      1
4                        selection menu great prices      1


#Splitting into train and test data

In [None]:
train_df, test_df = train_test_split(df, test_size=0.3, random_state=42)

#convert text into word vectors using word2vec

In [None]:
def w2v(sentence):
  tokenized_data = sentence.split()
  n_tokens = len(tokenized_data)
  if n_tokens >= 10:
    tokenized_data = tokenized_data[:10]
  else:
    pad_length = 10 - n_tokens
    tokenized_data += [f"<EOS>"] * pad_length

  vectors = []
  for tokens in tokenized_data:
    vec = [w2v_model[token] for token in tokens if token in w2v_model]
    if vec:
        vectors.append(sum(vec) / len(vec))
    else:
        vectors.append(np.zeros(w2v_model.vector_size))
  return torch.tensor(vectors, dtype=torch.float64)

#PyTorch Dataset child class to load data

In [None]:
class TextDataset(Dataset):
  def __init__(self, df):
    super().__init__()
    self.data = df['text'].values
    self.labels = df['label'].values

  def __len__(self):
    return len(self.data)

  def __getitem__(self, index):
    text = self.data[index]
    label = self.labels[index]
    w2v_data = w2v(text)
    w2v_data = w2v_data.double()
    label = torch.tensor(label, dtype=torch.float64)
    return w2v_data, label

# Preparing train and test datasets

In [None]:
train_dataset = TextDataset(train_df)
test_dataset = TextDataset(test_df)


train_loader = DataLoader(train_dataset, batch_size=10, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=10, num_workers=2, pin_memory=True)


#Transformer class

In [None]:
class Transformer(nn.Module):
    def __init__(self, input_dim=300, num_layers=2, num_heads=4, hidden_dim=128, max_len=10):
        super().__init__()

        self.num_layers = num_layers
        self.num_heads = num_heads
        self.hidden_dim = hidden_dim
        self.max_len = max_len

        # Multi-Head
        self.attention_layers = nn.ModuleList([
            nn.MultiheadAttention(input_dim, num_heads) for _ in range(num_layers)
        ])

        # Layer Normalization
        self.ln1 = nn.ModuleList([nn.LayerNorm(input_dim) for _ in range(num_layers)])
        self.ln2 = nn.ModuleList([nn.LayerNorm(input_dim) for _ in range(num_layers)])

        # Position-wise Feedforward
        self.feedforward_layers = nn.ModuleList([
            nn.Sequential(
                nn.Linear(input_dim, hidden_dim),
                nn.ReLU(),
                nn.Linear(hidden_dim, input_dim)
            ) for _ in range(num_layers)
        ])

        # Positional Encoding
        self.pos_enc = nn.Parameter(self.create_positional_encoding(max_len, input_dim), requires_grad=True)

        # Decoder
        self.decoder = nn.Linear(input_dim, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # Adding positional encoding to input
        x = x + self.pos_enc[:x.shape[1], :]

        # Transpose x to (seq_length, batch_size, input_dim)
        x = x.transpose(0, 1)

        # Perform self-attention and feedforward layers for each layer
        for i in range(self.num_layers):
            # Self-Attention Layer
            attention_output, _ = self.attention_layers[i](x, x, x)
            x = x + F.dropout(self.ln1[i](attention_output), p=0.1)

            # Feedforward Layer
            feedforward_output = self.feedforward_layers[i](x)
            x = x + F.dropout(self.ln2[i](feedforward_output), p=0.1)

        # Mean Pooling
        x = x.mean(dim=0)

        # Decode and return result
        x = self.decoder(x)
        x = self.sigmoid(x)
        return x

    def create_positional_encoding(self, max_len, input_dim):
        pos_enc = torch.zeros((max_len, input_dim))
        pos = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, input_dim, 2).float() * (-math.log(10000.0) / input_dim))
        pos_enc[:, 0::2] = torch.sin(pos * div_term)
        pos_enc[:, 1::2] = torch.cos(pos * div_term)
        return pos_enc



#Training and testing

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

myTransformer = Transformer().double().to(device)

#loss function and optimizer

criterion = nn.BCELoss().double()
optimizer = torch.optim.Adam(myTransformer.parameters(), lr=0.001)

for epoch in range(10):
  running_loss = 0.0
  correct = 0
  total = 0
  myTransformer.train()

  for i, (inputs, labels) in enumerate(train_loader):
    inputs, labels = inputs.to(device), labels.to(device)

    optimizer.zero_grad()

    # Forward + backward + optimize
    outputs = myTransformer(inputs)
    loss = criterion(outputs.squeeze(), labels)
    loss.backward()
    optimizer.step()

    # Track running loss and number of correct predictions
    running_loss += loss.item()
    total += labels.size(0)
    predicted = torch.round(outputs).squeeze()
    correct += (predicted == labels).sum().item()


  # Print training loss and accuracy every epoch
  train_loss = running_loss/len(train_loader)
  train_acc = (100*correct)/total
  print('Epoch [%d] training loss: %.3f' % (epoch+1, train_loss))
  print('Epoch [%d] training accuracy: %.3f' % (epoch+1, train_acc))

  # Evaluation
  correct = 0
  total = 0
  with torch.no_grad():
    for inputs, labels in test_loader:
      inputs = inputs.double().to(device)
      labels = labels.to(device)
      labels = labels.view(-1, 1)
      outputs = myTransformer(inputs)
      predicted = torch.round(outputs).double()
      total += labels.size(0)
      correct += (predicted == labels).sum().item()
    test_acc = (100*correct)/total
    print('Epoch [%d] test accuracy: %.3f' % (epoch+1, test_acc))


  return torch.tensor(vectors, dtype=torch.float64)
  return torch.tensor(vectors, dtype=torch.float64)


Epoch [1] training loss: 1.045
Epoch [1] training accuracy: 45.696


  return torch.tensor(vectors, dtype=torch.float64)
  return torch.tensor(vectors, dtype=torch.float64)


Epoch [1] test accuracy: 54.745


  return torch.tensor(vectors, dtype=torch.float64)
  return torch.tensor(vectors, dtype=torch.float64)


Epoch [2] training loss: 0.778
Epoch [2] training accuracy: 49.452


  return torch.tensor(vectors, dtype=torch.float64)
  return torch.tensor(vectors, dtype=torch.float64)


Epoch [2] test accuracy: 45.255


  return torch.tensor(vectors, dtype=torch.float64)
  return torch.tensor(vectors, dtype=torch.float64)


Epoch [3] training loss: 0.754
Epoch [3] training accuracy: 47.261


  return torch.tensor(vectors, dtype=torch.float64)
  return torch.tensor(vectors, dtype=torch.float64)


Epoch [3] test accuracy: 45.255


  return torch.tensor(vectors, dtype=torch.float64)
  return torch.tensor(vectors, dtype=torch.float64)


Epoch [4] training loss: 0.729
Epoch [4] training accuracy: 46.322


  return torch.tensor(vectors, dtype=torch.float64)
  return torch.tensor(vectors, dtype=torch.float64)


Epoch [4] test accuracy: 45.255


  return torch.tensor(vectors, dtype=torch.float64)
  return torch.tensor(vectors, dtype=torch.float64)


Epoch [5] training loss: 0.721
Epoch [5] training accuracy: 47.105


  return torch.tensor(vectors, dtype=torch.float64)
  return torch.tensor(vectors, dtype=torch.float64)


Epoch [5] test accuracy: 46.350


  return torch.tensor(vectors, dtype=torch.float64)
  return torch.tensor(vectors, dtype=torch.float64)


Epoch [6] training loss: 0.717
Epoch [6] training accuracy: 47.418


  return torch.tensor(vectors, dtype=torch.float64)
  return torch.tensor(vectors, dtype=torch.float64)


Epoch [6] test accuracy: 45.620


  return torch.tensor(vectors, dtype=torch.float64)
  return torch.tensor(vectors, dtype=torch.float64)


Epoch [7] training loss: 0.713
Epoch [7] training accuracy: 49.922


  return torch.tensor(vectors, dtype=torch.float64)
  return torch.tensor(vectors, dtype=torch.float64)


Epoch [7] test accuracy: 56.204


  return torch.tensor(vectors, dtype=torch.float64)
  return torch.tensor(vectors, dtype=torch.float64)


Epoch [8] training loss: 0.708
Epoch [8] training accuracy: 48.513


  return torch.tensor(vectors, dtype=torch.float64)
  return torch.tensor(vectors, dtype=torch.float64)


Epoch [8] test accuracy: 51.095


  return torch.tensor(vectors, dtype=torch.float64)
  return torch.tensor(vectors, dtype=torch.float64)


Epoch [9] training loss: 0.706
Epoch [9] training accuracy: 51.643


  return torch.tensor(vectors, dtype=torch.float64)
  return torch.tensor(vectors, dtype=torch.float64)


Epoch [9] test accuracy: 52.920


  return torch.tensor(vectors, dtype=torch.float64)
  return torch.tensor(vectors, dtype=torch.float64)


Epoch [10] training loss: 0.701
Epoch [10] training accuracy: 52.739


  return torch.tensor(vectors, dtype=torch.float64)
  return torch.tensor(vectors, dtype=torch.float64)


Epoch [10] test accuracy: 52.190
