<a href="https://colab.research.google.com/github/Bilal0031/githubProj/blob/master/SentimentAnalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers


In [None]:
import numpy as np
from tqdm.auto import tqdm
import tensorflow as tf

In [None]:
import pandas as pd


In [None]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/train.tsv', sep='\t')

In [None]:
df.head()

In [None]:
from transformers import BertTokenizer

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

In [None]:
X_input_ids = np.zeros((len(df), 256))
X_attn_masks = np.zeros((len(df), 256))


In [None]:
def preprocessing_dataset(df, ids, masks, tokenizer):
    for i, text in tqdm(enumerate(df['Phrase'])):
        tokenized_text = tokenizer.encode_plus(
            text,
            max_length=256, 
            truncation=True, 
            padding='max_length', 
            add_special_tokens=True,
            return_tensors='tf'
        )
        ids[i, :] = tokenized_text.input_ids
        masks[i, :] = tokenized_text.attention_mask
    return ids, masks

In [None]:
X_input_ids, X_attn_masks = preprocessing_dataset(df, X_input_ids, X_attn_masks, tokenizer)

In [None]:
labels = np.zeros((len(df), 5))

In [None]:
labels.shape

In [None]:
(156059, 5)

In [None]:
labels[np.arange(len(df)), df['Sentiment'].values]

In [None]:
dataset = tf.data.Dataset.from_tensor_slices((X_input_ids, X_attn_masks, labels))

In [None]:
dataset.take(1)

In [None]:
def SentimentDatasetMapFunction(input_ids, attn_masks, labels):
    return {
        'input_ids': input_ids,
        'attention_mask': attn_masks
    }, labels

In [None]:
dataset = dataset.map(SentimentDatasetMapFunction)

In [None]:
dataset = dataset.shuffle(10000).batch(16, drop_remainder=True) 


In [None]:
p = 0.8
train_size = int((len(df)//16)*p)

In [None]:
training_dataset = dataset.take(train_size)
validation_dataset = dataset.skip(train_size)


In [None]:
from transformers import TFBertModel

In [None]:
model = TFBertModel.from_pretrained('bert-base-cased')

In [None]:
input_ids = tf.keras.layers.Input(shape=(256,), name='input_ids', dtype='int32')


In [None]:
attn_masks = tf.keras.layers.Input(shape=(256,), name='attention_mask', dtype='int32')

In [None]:
bert_embds = model.bert(input_ids, attention_mask=attn_masks)[1]

In [None]:
intermediate_layer = tf.keras.layers.Dense(512, activation='relu', name='intermediate_layer')(bert_embds)

In [None]:
output_layer = tf.keras.layers.Dense(5, activation='softmax', name='output_layer')(intermediate_layer) 

In [None]:
sentiment_model = tf.keras.Model(inputs=[input_ids, attn_masks], outputs=output_layer)

In [None]:
sentiment_model.summary()

In [None]:
optim = tf.keras.optimizers.Adam(learning_rate=1e-5, decay=1e-6)


In [None]:
loss_func = tf.keras.losses.CategoricalCrossentropy()


In [None]:
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')


In [None]:
sentiment_model.compile(optimizer=optim, loss=loss_func, metrics=[acc])


In [None]:
model_training = sentiment_model.fit(
    training_dataset,
    validation_data=validation_dataset,
    epochs=2
)

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

In [None]:
def prepare_data(input_text, tokenizer):
    token = tokenizer.encode_plus(
        input_text,
        max_length=256, 
        truncation=True, 
        padding='max_length', 
        add_special_tokens=True,
        return_tensors='tf'
    )
    return {
        'input_ids': tf.cast(token.input_ids, tf.float64),
        'attention_mask': tf.cast(token.attention_mask, tf.float64)
    }


In [None]:
def make_prediction(model, processed_data, classes=['Negative', 'A bit negative', 'Neutral', 'A bit positive', 'Positive']):
    probs = sentiment_model.predict(processed_data)[0]
    return classes[np.argmax(probs)]

In [None]:
input_text = input('Input a review here:')
processed_data = prepare_data(input_text, tokenizer)
result = make_prediction(sentiment_model, processed_data=processed_data)
print(f"Classification results: {result}")