### **Aspect Based Sentiment Analysis**

In [17]:
from locale import normalize

''' Load all import Library and Framework '''
import pandas as pd
import torch
import torch.nn as nn
import os
import re

### **Load Dataset by use Pandas**

In [18]:
# Dataset path
Root_dir = '/Users/mahadiur/Desktop/Bongodev MLops Projects/Aspect Based Sentiment Analysis/Data'
test_dir = os.path.join(Root_dir, 'test.csv')
train_dir = os.path.join(Root_dir, 'train.csv')

# Load dataset
Test_Dataset = pd.read_csv(test_dir)
Train_Dataset = pd.read_csv(train_dir)

# Check Dataset
print(Test_Dataset.head())
print(Train_Dataset.head())

                                              review          aspect sentiment
0                   The bread is top notch as well .           bread  positive
1  I have to say they have one of the fastest del...  delivery times  positive
2      Food is always fresh and hot - ready to eat !            Food  positive
3     Did I mention that the coffee is OUTSTANDING ?          coffee  positive
4  Certainly not the best sushi in New York , how...           place  positive
                                              review   aspect sentiment
0              But the staff was so horrible to us .    staff  negative
1  To be completely fair , the only redeeming fac...     food  positive
2  The food is uniformly exceptional , with a ver...     food  positive
3  The food is uniformly exceptional , with a ver...  kitchen  positive
4  The food is uniformly exceptional , with a ver...     menu   neutral


In [19]:
Train_Dataset.columns

Index(['review', 'aspect', 'sentiment'], dtype='object')

### **Data (ABSA Part 1)**

In [20]:
''' Ready single example '''
index = 0
text = Train_Dataset.iloc[index]
review = text['review']
review = review.lower()
review = ' '.join(review.split())
print(review)


but the staff was so horrible to us .


In [21]:
''' Normalize Function '''
def Normalize(text):
    text = text.lower()
    text = re.sub(r"[^a-z0-9\s]", " ", text)
    text = ' '.join(text.split())
    return text

Normalize(review)

'but the staff was so horrible to us'

In [22]:
''' Word-level Tokenization '''
def Tokenization(text):
    text = text.split()
    return text

text = Normalize(review)
Tokenization(text)

['but', 'the', 'staff', 'was', 'so', 'horrible', 'to', 'us']

In [23]:
''' Vocabulary '''
def Vocabulary(texts):
    token_id = {
        '<padding>': 0,
        '<unknown>': 1
    }
    idx = 2
    for text in texts :
        text = Normalize(text)
        text = Tokenization(text)
        for token in text:
            if token_id.get(token) is None :
                token_id[token] = idx
                idx += 1
    return token_id

Token_2_id= Vocabulary(Train_Dataset['review'])
print(len(Token_2_id))


3736


In [26]:
''' Convert token to id '''
def convert_token_2_id(tokens):
    input_ids = [
        Token_2_id.get(token, Token_2_id['<unknown>']) for token in tokens
    ]
    return input_ids

idx = 0
text = Train_Dataset.iloc[idx]
review = text['review'] + 'hello'
normalize = Normalize(review)
Tokenize = Tokenization(normalize)
input_id = convert_token_2_id(Tokenize)

print(len(Token_2_id))
print(Tokenize)
print(input_id)

3736
['but', 'the', 'staff', 'was', 'so', 'horrible', 'to', 'us', 'hello']
[2, 3, 4, 5, 6, 7, 8, 9, 1]
