In [None]:
!pip install pennylane

Collecting pennylane
  Downloading PennyLane-0.33.0-py3-none-any.whl (1.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
Collecting rustworkx (from pennylane)
  Downloading rustworkx-0.13.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m21.8 MB/s[0m eta [36m0:00:00[0m
Collecting semantic-version>=2.7 (from pennylane)
  Downloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)
Collecting autoray>=0.6.1 (from pennylane)
  Downloading autoray-0.6.7-py3-none-any.whl (49 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.9/49.9 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
Collecting pennylane-lightning>=0.33 (from pennylane)
  Downloading PennyLane_Lightning-0.33.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (14.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
!git clone https://github.com/CQCL/qnlp_lorenz_etal_2021_resources
!mv qnlp_lorenz_etal_2021_resources/datasets mc_rp_dataset

Cloning into 'qnlp_lorenz_etal_2021_resources'...
remote: Enumerating objects: 93, done.[K
remote: Counting objects: 100% (93/93), done.[K
remote: Compressing objects: 100% (68/68), done.[K
remote: Total 93 (delta 41), reused 57 (delta 25), pack-reused 0[K
Receiving objects: 100% (93/93), 56.79 KiB | 4.73 MiB/s, done.
Resolving deltas: 100% (41/41), done.


In [None]:
from pathlib import Path
import pandas as pd
from joblib import load, dump
from sklearn.model_selection import train_test_split

from torch.utils.data import DataLoader, Dataset
import torch.utils.data as data_utils

from collections import Counter
from tqdm.autonotebook import tqdm

import numpy as np

  from tqdm.autonotebook import tqdm


In [None]:
mc_rp_sets_path = Path("mc_rp_dataset")
mc_rp_sets = list(mc_rp_sets_path.glob("*.txt"))
mc_datasets, rp_datasets = list(filter(lambda x: x.name.startswith("mc"), mc_rp_sets)), list(filter(lambda x: x.name.startswith("rp"), mc_rp_sets))

In [None]:
# Reading MC and RP data and creating data loaders from our data loading strategy
def read_process_mcrp(datapaths: dict):
  def rm(text):
    return " ".join(list(map(lambda x: x[:x.find('_')], text.split())))
  retval = {}
  for datapath in datapaths:
    if "rp" in str(datapath):
      sel = 2
    else:
      sel = 3
    df = pd.DataFrame(list(map(lambda x: [int(x[0]), x[sel:]], datapath.read_text().split("\n"))), columns=['label', 'text'])
    df['text'] = df['text'].apply(rm)
    retval[datapath.name.split(".")[0]] = df
  return retval

mc_data, rp_data = read_process_mcrp(mc_datasets), read_process_mcrp(rp_datasets)
MRP_BATCH_SIZE = 30

In [None]:
import math

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_sequence

import pennylane as qml

class MultiHeadAttentionBase(nn.Module):
    def __init__(self,
                 embed_dim: int,
                 num_heads: int,
                 dropout: float = 0.1,
                 mask=None,
                 use_bias=False):
        super(MultiHeadAttentionBase, self).__init__()

        assert embed_dim % num_heads == 0, f"Embedding dimension ({embed_dim}) should be divisible by number of heads ({num_heads})"

        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.d_k = embed_dim // num_heads  # projection dimensions
        self.k_linear = None
        self.q_linear = None
        self.v_linear = None
        self.combine_heads = None
        self.dropout = nn.Dropout(dropout)
        self.attn_weights = None

    def separate_heads(self, x):
        '''
        split into N heads
        from (batch_size, seq_len, embed_dim)
        to   (batch_size, seq_len, num_heads, embed_dim)
        then transpose (1,2) to (batch_size, num_heads, seq_len, embed_dim)
        to make mat mult straightforward for each head
        '''
        batch_size = x.size(0)
        x = x.view(batch_size, -1, self.num_heads, self.d_k)
        return x.transpose(1, 2)

    def attention(self, query, key, value, mask=None, dropout=None):
        '''
        Attention(Q, K, V) = softmax(Q K^T / sqrt(d_k))V
        '''
        scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(self.d_k)
        # see also: https://tensorchiefs.github.io/dlday2018/tutorial/einsum.html
        #scores = torch.einsum('bijh, bkjh -> bikh', query, key) / math.sqrt(self.d_k)
        if mask is not None:
            mask = mask.unsqueeze(1)
            scores = scores.masked_fill(mask == 0, -1e9)
        scores = F.softmax(scores, dim=-1)
        if dropout is not None:
            scores = dropout(scores)
        attn = torch.matmul(scores, value)
        return attn, scores

    def downstream(self, query, key, value, batch_size, mask=None):
        Q = self.separate_heads(query)
        K = self.separate_heads(key)
        V = self.separate_heads(value)

        x, self.attn_weights = self.attention(Q, K, V, mask, dropout=self.dropout)

        concat = x.transpose(1, 2).contiguous().view(batch_size, -1, self.embed_dim)

        return concat
        # output = self.combine_heads(concat)
        # return output

    def forward(self, x, mask=None):
        raise NotImplementedError("Base class does not execute forward function.")


class MultiHeadAttentionClassical(MultiHeadAttentionBase):
    def __init__(self, embed_dim: int,
                 num_heads: int,
                 dropout=0.1,
                 mask=None,
                 use_bias=False):
        super(MultiHeadAttentionClassical, self).__init__(embed_dim=embed_dim, num_heads=num_heads, dropout=dropout, mask=mask, use_bias=use_bias)

        self.k_linear = nn.Linear(embed_dim, embed_dim, bias=use_bias)
        self.q_linear = nn.Linear(embed_dim, embed_dim, bias=use_bias)
        self.v_linear = nn.Linear(embed_dim, embed_dim, bias=use_bias)
        self.combine_heads = nn.Linear(embed_dim, embed_dim, bias=use_bias)

    def forward(self, x, mask=None):
        batch_size, seq_len, embed_dim = x.size()
        assert embed_dim == self.embed_dim, f"Input embedding ({embed_dim}) does not match layer embedding size ({self.embed_dim})"

        K = self.k_linear(x)
        Q = self.q_linear(x)
        V = self.v_linear(x)

        x = self.downstream(Q, K, V, batch_size, mask)
        output = self.combine_heads(x)
        return output


class MultiHeadAttentionQuantum(MultiHeadAttentionBase):
    def __init__(self,
                 embed_dim: int,
                 num_heads: int,
                 dropout=0.1,
                 mask=None,
                 use_bias=False,
                 n_qubits: int = 4,
                 n_qlayers: int = 1,
                 q_device="default.qubit"):
        super(MultiHeadAttentionQuantum, self).__init__(embed_dim, num_heads, dropout=dropout, mask=mask, use_bias=use_bias)

        # todo: add intermediate layer to "dress" quantum circuit
        assert n_qubits == embed_dim, "Number of qubits ({n_qubits}) does not match embedding dim ({embed_dim})"

        self.n_qubits = n_qubits
        self.n_qlayers = n_qlayers
        self.q_device = q_device
        if 'qulacs' in q_device:
            self.dev = qml.device(q_device, wires=self.n_qubits, gpu=True)
        elif 'braket' in q_device:
            self.dev = qml.device(q_device, wires=self.n_qubits, parallel=True)
        else:
            self.dev = qml.device(q_device, wires=self.n_qubits)

        def _circuit(inputs, weights):
            qml.templates.AngleEmbedding(inputs, wires=range(self.n_qubits))
            qml.templates.BasicEntanglerLayers(weights, wires=range(n_qubits))
            return [qml.expval(qml.PauliZ(wires=i)) for i in range(n_qubits)]

        self.qlayer = qml.QNode(_circuit, self.dev, interface="torch")
        self.weight_shapes = {"weights": (n_qlayers, n_qubits)}
        print(f"weight_shapes = (n_qlayers, n_qubits) = ({n_qlayers}, {self.n_qubits})")

        self.k_linear = qml.qnn.TorchLayer(self.qlayer, self.weight_shapes)
        self.q_linear = qml.qnn.TorchLayer(self.qlayer, self.weight_shapes)
        self.v_linear = qml.qnn.TorchLayer(self.qlayer, self.weight_shapes)
        self.combine_heads = qml.qnn.TorchLayer(self.qlayer, self.weight_shapes)

    def forward(self, x, mask=None):
        batch_size, seq_len, embed_dim = x.size()
        assert embed_dim == self.embed_dim, f"Input embedding ({embed_dim}) does not match layer embedding size ({self.embed_dim})"

        K = [self.k_linear(x[:, t, :]) for t in range(seq_len)]
        Q = [self.q_linear(x[:, t, :]) for t in range(seq_len)]
        V = [self.v_linear(x[:, t, :]) for t in range(seq_len)]

        K = torch.Tensor(pad_sequence(K))
        Q = torch.Tensor(pad_sequence(Q))
        V = torch.Tensor(pad_sequence(V))

        x = self.downstream(Q, K, V, batch_size, mask)
        output = [self.combine_heads(x[:, t, :]) for t in range(seq_len)]
        output = torch.Tensor(pad_sequence(output))
        return output


class FeedForwardBase(nn.Module):
    def __init__(self, embed_dim, ffn_dim, dropout=0.1):
        super(FeedForwardBase, self).__init__()
        self.linear_1 = nn.Linear(embed_dim, ffn_dim)
        self.linear_2 = nn.Linear(ffn_dim, embed_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        raise NotImplementedError("Base class does not implement forward function")


class FeedForwardClassical(FeedForwardBase):
    def __init__(self, embed_dim, ffn_dim, dropout=0.1):
        super(FeedForwardClassical, self).__init__(embed_dim, ffn_dim, dropout)

    def forward(self, x):
        x = F.relu(self.linear_1(x))
        x = self.dropout(x)
        x = self.linear_2(x)
        return x


class FeedForwardQuantum(FeedForwardBase):
    def __init__(self, embed_dim, n_qubits, n_qlayers=1, dropout=0.1, q_device="default.qubit"):
        super(FeedForwardQuantum, self).__init__(embed_dim, ffn_dim=n_qubits, dropout=dropout)

        self.n_qubits = n_qubits
        if 'qulacs' in q_device:
            self.dev = qml.device(q_device, wires=self.n_qubits, gpu=True)
        elif 'braket' in q_device:
            self.dev = qml.device(q_device, wires=self.n_qubits, parallel=True)
        else:
            self.dev = qml.device(q_device, wires=self.n_qubits)

        def _circuit(inputs, weights):
            qml.templates.AngleEmbedding(inputs, wires=range(self.n_qubits))
            qml.templates.BasicEntanglerLayers(weights, wires=range(n_qubits))
            return [qml.expval(qml.PauliZ(wires=i)) for i in range(n_qubits)]
        self.qlayer = qml.QNode(_circuit, self.dev, interface="torch")
        self.weight_shapes = {"weights": (n_qlayers, n_qubits)}
        self.vqc = qml.qnn.TorchLayer(self.qlayer, self.weight_shapes)

    def forward(self, x):
        batch_size, seq_len, _ = x.size()
        x = self.linear_1(x)
        X = [self.vqc(x[:, t, :]) for t in range(seq_len)]
        x = torch.Tensor(pad_sequence(X))
        # dropout?
        x = self.linear_2(x)
        return x


class TransformerBlockBase(nn.Module):
    def __init__(self,
                 embed_dim: int,
                 num_head: int,
                 ff_dim: int,
                 n_qubits_transformer: int = 0,
                 n_qubits_ffn: int = 0,
                 n_qlayers: int = 1,
                 dropout: float = 0.1,
                 mask=None):
        super(TransformerBlockBase, self).__init__()
        self.attn = None
        self.ffn = None
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
        self.norm1 = nn.LayerNorm(embed_dim)
        self.norm2 = nn.LayerNorm(embed_dim)

    def forward(self, x):
        attn_output = self.attn(x)
        x = self.norm1(attn_output + x)
        x = self.dropout1(x)

        ff_output = self.ffn(x)
        x = self.norm2(ff_output + x)
        x = self.dropout2(x)

        return x


class TransformerBlockClassical(TransformerBlockBase):
    def __init__(self,
                 embed_dim: int,
                 num_heads: int,
                 ff_dim: int,
                 dropout: float = 0.1,
                 mask=None):
        super(TransformerBlockClassical, self).__init__(embed_dim, num_heads, ff_dim, dropout, mask)
        self.attn = MultiHeadAttentionClassical(embed_dim=embed_dim, num_heads=num_heads, dropout=dropout, mask=mask)
        self.ffn = FeedForwardClassical(embed_dim, ff_dim)


class TransformerBlockQuantum(TransformerBlockBase):
    def __init__(self,
                 embed_dim: int,
                 num_heads: int,
                 ffn_dim: int,
                 n_qubits_transformer: int = 0,
                 n_qubits_ffn: int = 0,
                 n_qlayers: int = 1,
                 dropout: float = 0.1,
                 mask=None,
                 q_device='default.qubit'):
        super(TransformerBlockQuantum, self).__init__(embed_dim, num_heads, ffn_dim, dropout, mask)

        self.n_qubits_transformer = n_qubits_transformer
        self.n_qubits_ffn = n_qubits_ffn
        self.n_qlayers = n_qlayers

        self.attn = MultiHeadAttentionQuantum(embed_dim,
                                              num_heads,
                                              n_qubits=n_qubits_transformer,
                                              n_qlayers=n_qlayers,
                                              dropout=dropout,
                                              mask=mask,
                                              q_device=q_device)
        if n_qubits_ffn > 0:
            self.ffn = FeedForwardQuantum(embed_dim, n_qubits_ffn, n_qlayers, q_device=q_device)
        else:
            self.ffn = FeedForwardClassical(embed_dim, ffn_dim)


class PositionalEncoder(nn.Module):
    def __init__(self, embed_dim, max_seq_len=512):
        super().__init__()
        self.embed_dim = embed_dim

        # create constant 'pe' matrix with values dependant on pos and i
        pe = torch.zeros(max_seq_len, embed_dim)
        for pos in range(max_seq_len):
            for i in range(0, embed_dim, 2):
                pe[pos, i] = math.sin(pos / (10000 ** ((2 * i)/embed_dim)))
                pe[pos, i + 1] = math.cos(pos / (10000 ** ((2 * (i + 1))/embed_dim)))
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        # make embeddings relatively larger
        x = x * math.sqrt(self.embed_dim)
        #add constant to embedding
        seq_len = x.size(1)
        x = x + torch.autograd.Variable(self.pe[:,:seq_len], requires_grad=False)  # .cuda()
        return x


class TextClassifier(nn.Module):
    def __init__(self,
                 embed_dim: int,
                 num_heads: int,
                 num_blocks: int,
                 num_classes: int,
                 vocab_size: int,
                 ffn_dim: int = 32,
                 n_qubits_transformer: int = 0,
                 n_qubits_ffn: int = 0,
                 n_qlayers: int = 0,
                 dropout=0.1,
                 q_device="device.qubit"):
        super(TextClassifier, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.num_blocks = num_blocks
        self.num_classes = num_classes
        self.vocab_size = vocab_size

        self.token_embedding = nn.Embedding(vocab_size, embed_dim)
        self.pos_embedding = PositionalEncoder(embed_dim)

        print(f"++ There will be {num_blocks} transformer blocks")

        if n_qubits_transformer > 0:
            print(f"++ Transformer will use {n_qubits_transformer} qubits and {n_qlayers} q layers")
            if n_qubits_ffn > 0:
                print(f"The feed-forward head will use {n_qubits_ffn} qubits")
            else:
                print(f"The feed-forward head will be classical")
            print(f"Using quantum device {q_device}")

            transformer_blocks = [
                TransformerBlockQuantum(embed_dim, num_heads, ffn_dim,
                                        n_qubits_transformer=n_qubits_transformer,
                                        n_qubits_ffn=n_qubits_ffn,
                                        n_qlayers=n_qlayers,
                                        q_device=q_device) for _ in range(num_blocks)
            ]
        else:
            transformer_blocks = [
                TransformerBlockClassical(embed_dim, num_heads, ffn_dim) for _ in range(num_blocks)
            ]

        self.transformers = nn.Sequential(*transformer_blocks)
        if self.num_classes > 2:
            self.class_logits = nn.Linear(embed_dim, num_classes)
        else:
            self.class_logits = nn.Linear(embed_dim, 1)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        batch_size=x.shape[0]
        tokens = self.token_embedding(x)
        # batch_size, seq_len, embed_dim = x.size()
        x = self.pos_embedding(tokens)
        x = self.transformers(x)
        x = x.mean(dim=1)  # global average pooling, works in 1D
        x = self.dropout(x)
        x = self.class_logits(x)
        # return F.log_softmax(x, dim=1)
        return torch.sigmoid(x).reshape(batch_size)

In [None]:
def data_frame_simple_txt2vec(data,keystr,min_length=6,w_dict=None): # Just label the words based on how often they show in the text
    txt_lst=[]
    max_len=0
    for txt in data[keystr].text:
        txtsplit=txt.split()
        txt_lst=txt_lst+txtsplit
        if max_len< len(txtsplit):
            max_len=len(txtsplit)
    w_set=set(txt_lst)

    comm_lst=Counter(txt_lst).most_common()

    if w_dict==None:
        w_dict={}
        ind=len(comm_lst)
        for elem in comm_lst:
            w_dict[elem[0]]=ind
            ind-=1
    #w_dict={}
    #ind=1
    #for elem in w_set:
    #    w_dict[elem]=ind
    #    ind+=1

    if max_len%2 !=0:
        max_len+=1
    if max_len<min_length:
        max_len=min_length

    label_list=[]
    Txt_list=[]
    for i, txt in enumerate(data[keystr].text):
        w_list=[]
        for word in data[keystr].text[i].split():
            if word in w_dict:
                w_list.append(w_dict[word])
            else:
                w_list.append(len(w_dict.keys())+1)
        if len(w_list) < max_len:
            w_list=w_list+(max_len-len(w_list))*[0]
        #print(torch.tensor(w_list))
        label_list.append(torch.tensor(data[keystr].label[i]))
        Txt_list.append(torch.tensor(w_list))
    label_tensor=torch.tensor(label_list)
    txt_tensor=torch.stack(Txt_list)

    return data_utils.TensorDataset(txt_tensor,label_tensor),w_dict

In [None]:


mc_df_train,train_dict=data_frame_simple_txt2vec(mc_data,'mc_train_data')
mc_df_test,_=data_frame_simple_txt2vec(mc_data,'mc_test_data',w_dict=train_dict)

mc_trainloader = DataLoader(mc_df_train, shuffle=True, batch_size=MRP_BATCH_SIZE)
mc_testloader = DataLoader(mc_df_test, shuffle=True, batch_size=30)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#embed_dim: int,num_head: int, ff_dim: int,
model=TextClassifier(12,6,1,2,20,ffn_dim=8).to(device)
num_epochs = 180


optimizer = torch.optim.Adam(
        model.parameters(), lr=1e-3)

++ There will be 1 transformer blocks


In [None]:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(trainable_params)

1089


In [None]:
def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    #round predictions to the closest integer
    rounded_preds = (torch.round(torch.sign(preds-0.5))+1)//2
    correct = (rounded_preds == y).float() #convert into float for division
    acc = correct.sum() / len(correct)
    return acc

In [None]:
N_EPOCHS=180
#optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
train_loss_history=[]
train_accuracy_history=[]
criterion = torch.nn.BCELoss()
for epoch in range(N_EPOCHS):
    train_loss = []
    acc=0
    total_len=0
    for batch in tqdm(mc_trainloader, desc=f"Epoch {epoch + 1} in training", leave=False):
        images, labels = batch
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels.float())

        train_loss.append(loss.detach().cpu().item()) # len(images)
        total_len += len(images)

        acc+=binary_accuracy(outputs, labels)*len(images)


        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    accuracy =  acc / total_len
    train_loss_history.append(np.mean(train_loss))
    train_accuracy_history.append(accuracy)
    print(f"Epoch {epoch + 1}/{N_EPOCHS} loss: {np.mean(train_loss):.2f} accuracy:{accuracy}")
    total_len=0
    acc1=0
    for batch in tqdm(mc_testloader, desc=f"Epoch {epoch + 1} in training", leave=False):
        images, labels = batch
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels.float())

        train_loss.append(loss.detach().cpu().item()) # len(images)
        total_len += len(images)

        acc1+=binary_accuracy(outputs, labels)*len(images)
    acc1 =  acc1 / total_len

    print('Testing Accuracy:', acc1)
    #Test_acc.append(acc1)

In [None]:
rp_df_train,train_dict=data_frame_simple_txt2vec(rp_data,'rp_train_data')
rp_df_test,_=data_frame_simple_txt2vec(rp_data,'rp_test_data',w_dict=train_dict)

rp_trainloader = DataLoader(rp_df_train, shuffle=True, batch_size=MRP_BATCH_SIZE)
rp_testloader = DataLoader(rp_df_test, shuffle=True, batch_size=30)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#embed_dim: int,num_head: int, ff_dim: int,
model=TextClassifier(12,6,1,2,100,ffn_dim=8).to(device)


optimizer = torch.optim.Adam(
        model.parameters(), lr=1e-3)

++ There will be 1 transformer blocks


In [None]:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(trainable_params)

2049


In [None]:
N_EPOCHS=300
#optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
train_loss_history=[]
train_accuracy_history=[]
criterion = torch.nn.BCELoss()
for epoch in range(N_EPOCHS):
    train_loss = []
    acc=0
    total_len=0
    for batch in tqdm(rp_trainloader, desc=f"Epoch {epoch + 1} in training", leave=False):
        images, labels = batch
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels.float())

        train_loss.append(loss.detach().cpu().item()) # len(images)
        total_len += len(images)

        acc+=binary_accuracy(outputs, labels)*len(images)


        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    accuracy =  acc / total_len
    train_loss_history.append(np.mean(train_loss))
    train_accuracy_history.append(accuracy)
    print(f"Epoch {epoch + 1}/{N_EPOCHS} loss: {np.mean(train_loss):.2f} accuracy:{accuracy}")
    total_len=0
    acc1=0
    for batch in tqdm(rp_testloader, desc=f"Epoch {epoch + 1} in training", leave=False):
        images, labels = batch
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels.float())

        train_loss.append(loss.detach().cpu().item()) # len(images)
        total_len += len(images)

        acc1+=binary_accuracy(outputs, labels)*len(images)
    acc1 =  acc1 / total_len

    print('Testing Accuracy:', acc1)
    #Test_acc.append(acc1)

In [None]:
!wget https://archive.ics.uci.edu/static/public/331/sentiment+labelled+sentences.zip
!unzip -o sentiment+labelled+sentences.zip
!rm "sentiment labelled sentences/readme.txt"

--2023-11-01 14:30:55--  https://archive.ics.uci.edu/static/public/331/sentiment+labelled+sentences.zip
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified
Saving to: ‘sentiment+labelled+sentences.zip’

          sentiment     [<=>                 ]       0  --.-KB/s               sentiment+labelled+     [ <=>                ]  82.21K  --.-KB/s    in 0.08s   

2023-11-01 14:30:55 (1.02 MB/s) - ‘sentiment+labelled+sentences.zip’ saved [84188]

Archive:  sentiment+labelled+sentences.zip
   creating: sentiment labelled sentences/
  inflating: sentiment labelled sentences/.DS_Store  
   creating: __MACOSX/
   creating: __MACOSX/sentiment labelled sentences/
  inflating: __MACOSX/sentiment labelled sentences/._.DS_Store  
  inflating: sentiment labelled sentences/amazon_cells_labelled.txt  
  inflating: sentiment lab

In [None]:
# Separating Amazon, IMDb, Yelp dataset paths
datasets_path = Path("sentiment labelled sentences")
datasets = list(datasets_path.glob("*.txt"))

# Reading all data and creating data loaders from our data loading strategy
def read_process(datapaths: list):
    retval = {}
    for datapath in datapaths:
        df = pd.DataFrame(list(map(lambda x: x.split("\t"), datapath.read_text().split("\n"))), columns=['text', 'label']).dropna()
        df['label'] = df['label'].apply(lambda  x: int(x))
        retval[datapath.name.split(".")[0]] = df
    return retval

def ttsplit(data: pd.DataFrame, test_size=0.2):
    train_data, test_data = train_test_split(data, test_size=test_size, stratify=data['label'], random_state=42)
    train_data, test_data = train_data.reset_index().drop(columns=['index']), test_data.reset_index().drop(columns=['index'])
    # train_data = pd.DataFrame({'text': X_train, 'labels': y_train})
    # test_data = pd.DataFrame({'text': X_test, 'labels': y_test})
    return train_data, test_data



datadict = read_process(datasets)
amazon_data = datadict['amazon_cells_labelled']
imdb_data = datadict['imdb_labelled']
yelp_data = datadict['yelp_labelled']

amazon_train, amazon_test = ttsplit(amazon_data, 0.2)
imdb_train, imdb_test = ttsplit(imdb_data, 0.2)
yelp_train, yelp_test = ttsplit(yelp_data, 0.2)

In [None]:
def data_frame_simple_txt2vec_2(df,min_length=4,max_cut=32,w_dict=None,cutoff=0.7):
    txt_lst=[]
    max_len=0
    for txt in df.text:
        txtsplit=txt.lower().replace(".","").replace(",","").replace("?"," ?").replace("!"," !").replace("&"," and ").replace("("," ( ").replace(")"," ) ").split()
        txt_lst=txt_lst+txtsplit
        if max_len< len(txtsplit):
            max_len=len(txtsplit)
    w_set=set(txt_lst)

    comm_lst=Counter(txt_lst).most_common()

    if w_dict==None:
        w_dict={}
        ind=int(len(comm_lst)*cutoff)
        for elem in comm_lst:
            w_dict[elem[0]]=ind
            ind-=1
            if ind == 0:
                break
    #w_dict={}
    #ind=1
    #for elem in w_set:
    #    w_dict[elem]=ind
    #    ind+=1

    if max_len%2 !=0:
        max_len+=1
    if max_len<min_length:
        max_len=min_length

    label_list=[]
    Txt_list=[]
    for i, txt in enumerate(df.text):
        w_list=[]
        for word in df.text[i].split():
            if word in w_dict:
                w_list.append(w_dict[word])
            else:
                w_list.append(len(w_dict.keys())+1)
        if len(w_list) < max_cut:
            w_list=w_list+(max_cut-len(w_list))*[0]
        else:
            w_list=w_list[:max_cut]
        #print(torch.tensor(w_list))
        label_list.append(torch.tensor(df.label[i]))
        Txt_list.append(torch.tensor(w_list))
    label_tensor=torch.tensor(label_list)
    txt_tensor=torch.stack(Txt_list)

    return data_utils.TensorDataset(txt_tensor,label_tensor),w_dict

In [None]:
data_train, data_test=amazon_train, amazon_test
tdf_train,train_dict=data_frame_simple_txt2vec_2(data_train,max_cut=32)
tdf_test,_=data_frame_simple_txt2vec_2(data_test,max_cut=32,w_dict=train_dict)

In [None]:
trainloader = DataLoader(tdf_train, shuffle=True, batch_size=200)
testloader = DataLoader(tdf_test, shuffle=True, batch_size=200)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#embed_dim: int,num_head: int, ff_dim: int,
model=TextClassifier(12,6,1,2,2400,ffn_dim=8).to(device)


optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

++ There will be 1 transformer blocks


In [None]:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(trainable_params)

29649


In [None]:
N_EPOCHS=300
#optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
train_loss_history=[]
train_accuracy_history=[]
criterion = torch.nn.BCELoss()
for epoch in range(N_EPOCHS):
    train_loss = []
    acc=0
    total_len=0
    for batch in tqdm(trainloader, desc=f"Epoch {epoch + 1} in training", leave=False):
        images, labels = batch
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels.float())

        train_loss.append(loss.detach().cpu().item()) # len(images)
        total_len += len(images)

        acc+=binary_accuracy(outputs, labels)*len(images)


        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    accuracy =  acc / total_len
    train_loss_history.append(np.mean(train_loss))
    train_accuracy_history.append(accuracy)
    print(f"Epoch {epoch + 1}/{N_EPOCHS} loss: {np.mean(train_loss):.2f} accuracy:{accuracy}")
    total_len=0
    acc1=0
    for batch in tqdm(testloader, desc=f"Epoch {epoch + 1} in training", leave=False):
        images, labels = batch
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels.float())

        train_loss.append(loss.detach().cpu().item()) # len(images)
        total_len += len(images)

        acc1+=binary_accuracy(outputs, labels)*len(images)
    acc1 =  acc1 / total_len

    print('Testing Accuracy:', acc1)
    #Test_acc.append(acc1)

In [None]:
data_train, data_test=imdb_train, imdb_test
tdf_train,train_dict=data_frame_simple_txt2vec_2(data_train,max_cut=71)
tdf_test,_=data_frame_simple_txt2vec_2(data_test,max_cut=71,w_dict=train_dict)

In [None]:
trainloader = DataLoader(tdf_train, shuffle=True, batch_size=200)
testloader = DataLoader(tdf_test, shuffle=True, batch_size=200)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#embed_dim: int,num_head: int, ff_dim: int,
model=TextClassifier(12,6,1,2,3300,ffn_dim=8).to(device)


optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

++ There will be 1 transformer blocks


In [None]:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(trainable_params)

40449


In [None]:
N_EPOCHS=300
#optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
train_loss_history=[]
train_accuracy_history=[]
criterion = torch.nn.BCELoss()
for epoch in range(N_EPOCHS):
    train_loss = []
    acc=0
    total_len=0
    for batch in tqdm(trainloader, desc=f"Epoch {epoch + 1} in training", leave=False):
        images, labels = batch
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels.float())

        train_loss.append(loss.detach().cpu().item()) # len(images)
        total_len += len(images)

        acc+=binary_accuracy(outputs, labels)*len(images)


        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    accuracy =  acc / total_len
    train_loss_history.append(np.mean(train_loss))
    train_accuracy_history.append(accuracy)
    print(f"Epoch {epoch + 1}/{N_EPOCHS} loss: {np.mean(train_loss):.2f} accuracy:{accuracy}")
    total_len=0
    acc1=0
    for batch in tqdm(testloader, desc=f"Epoch {epoch + 1} in training", leave=False):
        images, labels = batch
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels.float())

        train_loss.append(loss.detach().cpu().item()) # len(images)
        total_len += len(images)

        acc1+=binary_accuracy(outputs, labels)*len(images)
    acc1 =  acc1 / total_len

    print('Testing Accuracy:', acc1)

In [None]:
data_train, data_test=yelp_train, yelp_test
tdf_train,train_dict=data_frame_simple_txt2vec_2(data_train,max_cut=32)
tdf_test,_=data_frame_simple_txt2vec_2(data_test,max_cut=32,w_dict=train_dict)

In [None]:
trainloader = DataLoader(tdf_train, shuffle=True, batch_size=200)
testloader = DataLoader(tdf_test, shuffle=True, batch_size=200)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#embed_dim: int,num_head: int, ff_dim: int,
model=TextClassifier(12,6,1,2,2000,ffn_dim=8).to(device)


optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

++ There will be 1 transformer blocks


In [None]:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(trainable_params)

24849


In [None]:
N_EPOCHS=300
#optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
train_loss_history=[]
train_accuracy_history=[]
criterion = torch.nn.BCELoss()
for epoch in range(N_EPOCHS):
    train_loss = []
    acc=0
    total_len=0
    for batch in tqdm(trainloader, desc=f"Epoch {epoch + 1} in training", leave=False):
        images, labels = batch
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels.float())

        train_loss.append(loss.detach().cpu().item()) # len(images)
        total_len += len(images)

        acc+=binary_accuracy(outputs, labels)*len(images)


        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    accuracy =  acc / total_len
    train_loss_history.append(np.mean(train_loss))
    train_accuracy_history.append(accuracy)
    print(f"Epoch {epoch + 1}/{N_EPOCHS} loss: {np.mean(train_loss):.2f} accuracy:{accuracy}")
    total_len=0
    acc1=0
    for batch in tqdm(testloader, desc=f"Epoch {epoch + 1} in training", leave=False):
        images, labels = batch
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels.float())

        train_loss.append(loss.detach().cpu().item()) # len(images)
        total_len += len(images)

        acc1+=binary_accuracy(outputs, labels)*len(images)
    acc1 =  acc1 / total_len

    print('Testing Accuracy:', acc1)