In [98]:
import random
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import nltk
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import matplotlib.pyplot as plt
from scipy.stats import linregress

In [99]:
df = pd.read_csv('ReviewsEN.csv')

In [100]:
print("-1 :", df['sentiment'].value_counts()[-1])
print("0 :", df['sentiment'].value_counts()[0])
print("1 :", df['sentiment'].value_counts()[1])

-1 : 870
0 : 639
1 : 1518


In [101]:
# Replace values in pandas DataFrame.
df['sentiment'] = df['sentiment'].replace([1], 2)
df['sentiment'] = df['sentiment'].replace([0], 1)
df['sentiment'] = df['sentiment'].replace([-1], 0)

## Case Folding

In [102]:
# Apply lower function
df['reviews'] = df['reviews'].apply(str.lower)
print(df)

      sentiment                                            reviews
0             1  teacher are punctual but they should also give...
1             2                                               good
2             2  excellent lectures are delivered by teachers a...
3             2  teachers give us all the information required ...
4             2                                                yes
...         ...                                                ...
3022          2        lecturers provide clear enough explanations
3023          2            lecturer's assessment is very objective
3024          0              lecturers give very good explanations
3025          0        lecturers often provoke discussion in class
3026          2         lecturers provide material in a boring way

[3027 rows x 2 columns]


## Hyper Parameter

In [103]:
# Global Variables

EMBEDDING_DIM = 100
MAXLEN = 16
TRUNCATING = 'post'
PADDING = 'post'
OOV_TOKEN = "<OOV>"
MAX_EXAMPLES = len(df)
TRAINING_SPLIT = 0.8

## Remove Punctuation

In [104]:
import string

def remove_punctuation(text):
    # Remove punctuation marks
    translator = str.maketrans('', '', string.punctuation)
    text_without_punctuation = text.translate(translator)
    return text_without_punctuation

In [105]:
removedPunctuation_text = []
for i in range(len(df)):
  removedPunctuation_text.append(remove_punctuation(df['reviews'][i]))

df['reviews'] = removedPunctuation_text
df['reviews'].values

array(['teacher are punctual but they should also give us the some practical knowledge other than theortical',
       'good',
       'excellent lectures are delivered by teachers and all teachers are very punctual',
       ..., 'lecturers give very good explanations',
       'lecturers often provoke discussion in class',
       'lecturers provide material in a boring way'], dtype=object)

## Lemmatizing

In [106]:
import nltk
from nltk.stem import WordNetLemmatizer

nltk.download('wordnet')
nltk.download('punkt')

def lemmatize_text(text):
    lemmatizer = WordNetLemmatizer()
    lemmatized_words = []
    tokens = nltk.word_tokenize(text)
    for token in tokens:
        lemma = lemmatizer.lemmatize(token)
        lemmatized_words.append(lemma)
    lemmatized_text = ' '.join(lemmatized_words)
    return lemmatized_text


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [107]:
temp =  []
for i in range(len(df)):
  temp.append(lemmatize_text(df['reviews'][i]))
df['reviews'] = temp

## Convert Numbers

In [108]:
import re

def remove_numbers(text):
    cleaned_text = re.sub(r'\d+', '', text)
    return cleaned_text

In [109]:
df['reviews'] = df['reviews'].apply(remove_numbers)
df

Unnamed: 0,sentiment,reviews
0,1,teacher are punctual but they should also give...
1,2,good
2,2,excellent lecture are delivered by teacher and...
3,2,teacher give u all the information required to...
4,2,yes
...,...,...
3022,2,lecturer provide clear enough explanation
3023,2,lecturer assessment is very objective
3024,0,lecturer give very good explanation
3025,0,lecturer often provoke discussion in class


## Random Sampling

In [110]:
import random

random.seed(42)

# Get the indices of the DataFrame
indices = df.index.tolist()

# Perform random sampling on the indices
selected_indices = random.sample(indices, MAX_EXAMPLES)

# Select the corresponding sentences and labels based on the sampled indices
sentences = df.loc[selected_indices, 'reviews']
labels = df.loc[selected_indices, 'sentiment']

print(f"There are {len(sentences)} sentences and {len(labels)} labels after random sampling\n")


There are 3027 sentences and 3027 labels after random sampling



In [111]:
print(selected_indices)
print(len(selected_indices))

[2619, 456, 102, 1126, 1003, 914, 571, 3016, 419, 2771, 2233, 356, 2418, 1728, 130, 122, 383, 895, 952, 2069, 2465, 108, 2298, 814, 2932, 2661, 2872, 2232, 1718, 902, 1839, 2413, 1139, 26, 653, 2859, 1731, 1393, 1138, 636, 881, 1378, 418, 379, 1556, 396, 1470, 1408, 2472, 1083, 177, 1881, 2196, 511, 1550, 322, 2261, 1200, 2574, 2533, 1481, 2364, 787, 2885, 284, 187, 2708, 933, 1185, 326, 953, 413, 2982, 2988, 1857, 2603, 1494, 666, 1516, 1455, 858, 2745, 1093, 2874, 2799, 2654, 292, 2495, 2600, 700, 2187, 1002, 669, 1893, 1554, 1105, 2621, 2818, 2281, 899, 2804, 1328, 229, 938, 131, 1292, 1643, 1096, 271, 864, 2323, 1288, 870, 2684, 2044, 1620, 2633, 1879, 585, 1084, 3020, 1010, 2299, 2207, 1076, 2394, 1754, 2390, 1635, 1482, 898, 566, 2087, 2021, 372, 192, 449, 626, 2570, 655, 2787, 1729, 2442, 260, 1576, 1563, 2440, 1917, 2167, 1029, 2266, 47, 2786, 469, 2792, 2199, 1092, 2625, 2989, 3025, 1202, 1780, 647, 1858, 13, 1078, 2050, 731, 2079, 435, 2561, 1222, 2617, 2858, 2494, 3003, 2889

In [112]:
sentences

2619    the professor did not make use of reallife exa...
456     it good but we dont find the book because thei...
102     interaction of some faculty wa good but not al...
1126    he is one of the best teacher i have ever had ...
1003    the lecturer wa organized and clear but they c...
                              ...                        
708     this course wa amazing i learned so much and i...
1172    she say that if you understand the example fro...
160     very knowledgeable some subject do not have de...
2328    the professor doe not encourage active partici...
887       dont be afraid to ask for help when you need it
Name: reviews, Length: 3027, dtype: object

In [113]:
labels

2619    0
456     1
102     1
1126    2
1003    1
       ..
708     2
1172    2
160     1
2328    0
887     1
Name: sentiment, Length: 3027, dtype: int64

# Training - Validation Split

In [114]:
def train_val_split(sentences, labels, training_split):
    ### START CODE HERE

    # Compute the number of sentences that will be used for training (should be an integer)
    train_size = int(len(sentences)*training_split)

    # Split the sentences and labels into train/validation splits
    train_sentences = sentences[:train_size]
    train_labels = labels[:train_size]

    test_sentences = sentences[train_size:]
    test_labels = labels[train_size:]

    ### END CODE HERE

    return train_sentences, test_sentences, train_labels, test_labels

In [115]:
train_sentences, test_sentences, train_labels, test_labels = train_val_split(sentences, labels, TRAINING_SPLIT)

print(f"There are {len(train_sentences)} sentences for training.\n")
print(f"There are {len(train_labels)} labels for training.\n")
print(f"There are {len(test_sentences)} sentences for validation.\n")
print(f"There are {len(test_labels)} labels for validation.")

There are 2724 sentences for training.

There are 2724 labels for training.

There are 303 sentences for validation.

There are 303 labels for validation.


In [162]:
test_sentences

2588    the professor had a rigid teaching style not a...
838     the subject material are adequate and meet the...
1294    the professor is a wonderful and extremely hel...
2664    the professor demonstrated effective communica...
943     the presenter did not use effective visuals or...
                              ...                        
708     this course wa amazing i learned so much and i...
1172    she say that if you understand the example fro...
160     very knowledgeable some subject do not have de...
2328    the professor doe not encourage active partici...
887       dont be afraid to ask for help when you need it
Name: reviews, Length: 303, dtype: object

# Tokenization & Stopwords - Sequences, Truncating, and Padding

In [116]:
pip install nltk

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


#### Stopwords

In [117]:
my_file = open("stopwords.txt", "r")

data = my_file.read()

stopwords_data = data.split("\n")
print(stopwords_data)
my_file.close()

['a', 'about', 'above', 'across', 'after', 'again', 'against', 'all', 'almost', 'alone', 'along', 'already', 'also', 'although', 'always', 'among', 'an', 'and', 'another', 'any', 'anybody', 'anyone', 'anything', 'anywhere', 'are', 'area', 'areas', 'around', 'as', 'ask', 'asked', 'asking', 'asks', 'at', 'away', 'b', 'back', 'backed', 'backing', 'backs', 'be', 'became', 'because', 'become', 'becomes', 'been', 'before', 'began', 'behind', 'being', 'beings', 'best', 'better', 'between', 'big', 'both', 'but', 'by', 'c', 'came', 'can', 'cannot', 'case', 'cases', 'certain', 'certainly', 'clear', 'clearly', 'come', 'could', 'd', 'did', 'differ', 'different', 'differently', 'do', 'does', 'done', 'down', 'down', 'downed', 'downing', 'downs', 'during', 'e', 'each', 'early', 'either', 'end', 'ended', 'ending', 'ends', 'enough', 'even', 'evenly', 'ever', 'every', 'everybody', 'everyone', 'everything', 'everywhere', 'f', 'face', 'faces', 'fact', 'facts', 'far', 'felt', 'few', 'find', 'finds', 'first

In [118]:
def remove_stopwords(sentence, data):
    # List of stopwords

    stopwords = data + ["a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", "by", "could", "did", "do", "does", "doing", "down", "during", "each", "few", "for", "from", "further", "had", "has", "have", "having", "he", "he'd", "he'll", "he's", "her", "here", "here's", "hers", "herself", "him", "himself", "his", "how", "how's", "i", "i'd", "i'll", "i'm", "i've", "if", "in", "into", "is", "it", "it's", "its", "itself", "let's", "me", "more", "most", "my", "myself", "nor", "of", "on", "once", "only", "or", "other", "ought", "our", "ours", "ourselves", "out", "over", "own", "same", "she", "she'd", "she'll", "she's", "should", "so", "some", "such", "than", "that", "that's", "the", "their", "theirs", "them", "themselves", "then", "there", "there's", "these", "they", "they'd", "they'll", "they're", "they've", "this", "those", "through", "to", "too", "under", "until", "up", "very", "was", "we", "we'd", "we'll", "we're", "we've", "were", "what", "what's", "when", "when's", "where", "where's", "which", "while", "who", "who's", "whom", "why", "why's", "with", "would", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves" ]
    numbers_stopwords = ["1", "2", "3", "4","5","6","7","8","9","10",
                         "one", "two","three","four","five","098"]
    more_words = ["didn't", "don't", "dont", "didnt", "it", "doesnt", "doesn't", "hw","won't","lpu","weren't","mr","mcq","shes",
                  "shes","india","in","hes","shes","me", "dr", "nlandu", "ko","it","1st", "omr", "ha", "upto","ca", "soo", "cd", "ive","po","cse", "chem", "un","of",
                  "mte", "omr","mte's","ca's","ete's","jnv","ip","sir","its","wks","prob","python","java","lattc","ol","ived","elsewhere", "mother","wouldnt","car",
                  "si", "sat","we","home","hot","god","ice","money's","money","even","about","thats", "wks", "thurs", "months", "sir", "go", "jnv", "ip", "today", "today's", "linux", "github",
                  "lt", "ums", "superb", "at", "cgpa","ques", "brain's", "mcqs", "ve", "say", "pc", "viva", "after", "before", "draw", "asst", "only", "rich", "never", "went", "pcs", "gk", "one's",
                  "co", "duty", "gona", "attendnce","same", "that's", "hahahah", "ad's", "university's", "relly", "build", "cricket", "said", "hall", "profs", "guy's", "can", "along", "archieve", "bag",
                  "part", "master", "push", "or", "add", "were", "virginia","human", "bless", "clean", "count", "onlineopen", "ounce", "brushing", "zero", "mail", "fys", "lowell", "stets", "untill", "until",
                  "prep", "appears", "giulia", "yuk", "memo", "ton", "110q", "unit", "80","re","by","order","fob", "sit", "from","art", "org", "4d", "3d", "cinema", "iii", "cal", "both", "sundays", "todays", "ad",
                  "yoursel","yourself", "kiss", "it'll", "obayani's", "anal", "pgs", "csci", "hw", "more", "able", "lecturer", "lecturer's", "student", "stundet's", "it", "want", "you","he's", "she's"]
    more =  [
    'a', 'about', 'above', 'after', 'again', 'against', "ain't", 'all', 'am', 'an', 'and', 'any', 'are', 'aren\'t', 'as',
    'at', 'be', 'because', 'been', 'before', 'being', 'below', 'between', 'both', 'but', 'by', 'can', 'can\'t', 'cannot',
    'could', 'couldn\'t', 'did', 'didn\'t', 'do', 'does', 'doesn\'t', 'doing', 'don\'t', 'down', 'during', 'each', 'few',
    'for', 'from', 'further', 'had', 'hadn\'t', 'has', 'hasn\'t', 'have', 'haven\'t', 'having', 'he', 'he\'d', 'he\'ll',
    'he\'s', 'her', 'here', 'here\'s', 'hers', 'herself', 'him', 'himself', 'his', 'how', 'how\'s', 'i', 'i\'d', 'i\'ll',
    'i\'m', 'i\'ve', 'if', 'in', 'into', 'is', 'isn\'t', 'it', 'it\'s', 'its', 'itself', 'let\'s', 'me', 'more', 'most',
    'mustn\'t', 'my', 'myself', 'no', 'nor', 'not', 'of', 'off', 'on', 'once', 'only', 'or', 'other', 'ought', 'our',
    'ours', 'ourselves', 'out', 'over', 'own', 'same', 'shan\'t', 'she', 'she\'d', 'she\'ll', 'she\'s', 'should',
    'shouldn\'t', 'so', 'some', 'such', 'than', 'that', 'that\'s', 'the', 'their', 'theirs', 'them', 'themselves',
    'then', 'there', 'there\'s', 'these', 'they', 'they\'d', 'they\'ll', 'they\'re', 'they\'ve', 'this', 'those',
    'through', 'to', 'too', 'under', 'until', 'up', 'very', 'was', 'wasn\'t', 'we', 'we\'d', 'we\'ll', 'we\'re',
    'we\'ve', 'were', 'weren\'t', 'what', 'what\'s', 'when', 'when\'s', 'where', 'where\'s', 'which', 'while', 'who',
    'who\'s', 'whom', 'why', 'why\'s', 'with', 'won\'t', 'would', 'wouldn\'t', 'you', 'you\'d', 'you\'ll', 'you\'re',
    'you\'ve', 'your', 'yours', 'yourself', 'yourselves']

    final_stopwords = stopwords + numbers_stopwords + more_words + more

    words = sentence.split()
    tempWords = []
    for i in words:
        if i not in final_stopwords:
            tempWords.append(i)
            sentence = ' '.join(tempWords)


    return sentence

In [119]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

nltk.download('punkt')
nltk.download('stopwords')

stop_words = set(stopwords.words('english'))
stop_words_list = list(stop_words)
stopwords_data = stopwords_data + stop_words_list

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [120]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(stop_words='english')
response = vectorizer.fit_transform(df['reviews'])
print(type(response))

<class 'scipy.sparse._csr.csr_matrix'>


In [121]:
cKomen = []
for i in range(len(df)):
  cKomen.append(remove_stopwords(df['reviews'][i], stopwords_data))

df['reviews'] = cKomen

In [122]:
final_data = df.copy()
final_data['reviews'][2619]
# final_data['sentiment'][2619]

'professor reallife example study illustrate concept theory'

In [123]:
final_data.to_csv('final_data.csv')

In [124]:
# GRADED FUNCTION: fit_tokenizer
def fit_tokenizer(sentences, oov_token):
    ### START CODE HERE

    # Instantiate the Tokenizer class, passing in the correct values for oov_token
    tokenizer = Tokenizer(oov_token = OOV_TOKEN)

    # Fit the tokenizer to the training sentences
    tokenizer.fit_on_texts(sentences)

    ### END CODE HERE

    return tokenizer

In [125]:
# Test your function
tokenizer = fit_tokenizer(train_sentences, OOV_TOKEN)
word_index = tokenizer.word_index
VOCAB_SIZE = len(word_index)

print(f"Vocabulary contains {VOCAB_SIZE} words\n")
print("<OOV> token included in vocabulary" if "<OOV>" in word_index else "<OOV> token NOT included in vocabulary")
print(f"\nindex of word 'i' should be {word_index['i']}")

Vocabulary contains 3669 words

<OOV> token included in vocabulary

index of word 'i' should be 10


In [126]:
def seq_pad_and_trunc(sentences, tokenizer, padding, truncating, maxlen):

    sequences = tokenizer.texts_to_sequences(sentences)
    pad_trunc_sequences = pad_sequences(sequences, maxlen= MAXLEN, padding = PADDING, truncating = TRUNCATING)
    return pad_trunc_sequences

In [127]:
train_pad_trunc_seq = seq_pad_and_trunc(train_sentences, tokenizer, PADDING, TRUNCATING, MAXLEN)
val_pad_trunc_seq = seq_pad_and_trunc(test_sentences, tokenizer, PADDING, TRUNCATING, MAXLEN)

print(f"Padded and truncated training sequences have shape: {train_pad_trunc_seq.shape}\n")
print(f"Padded and truncated validation sequences have shape: {val_pad_trunc_seq.shape}")

Padded and truncated training sequences have shape: (2724, 16)

Padded and truncated validation sequences have shape: (303, 16)


In [128]:
train_labels = np.array(train_labels)
val_labels = np.array(test_labels)

# Using pre-defined Embeddings

In [129]:
# Define path to file containing the embeddings
GLOVE_FILE = 'glove.6B.100d.txt'

# Initialize an empty embeddings index dictionary
GLOVE_EMBEDDINGS = {}

# Read file and fill GLOVE_EMBEDDINGS with its contents
with open(GLOVE_FILE) as f:
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        GLOVE_EMBEDDINGS[word] = coefs

# Represent the words in your vocabulary using the embeddings

In [130]:
# Initialize an empty numpy array with the appropriate size
EMBEDDINGS_MATRIX = np.zeros((VOCAB_SIZE+1, EMBEDDING_DIM))

# Iterate all of the words in the vocabulary and if the vector representation for
# each word exists within GloVe's representations, save it in the EMBEDDINGS_MATRIX array
for word, i in word_index.items():
    embedding_vector = GLOVE_EMBEDDINGS.get(word)
    if embedding_vector is not None:
        EMBEDDINGS_MATRIX[i] = embedding_vector

# Define a model that does not overfit

Model with 0.001 learning rate

In [131]:
# def create_model(vocab_size, embedding_dim, maxlen, embeddings_matrix):
#     model = tf.keras.Sequential([
#         # This is how you need to set the Embedding layer when using pre-trained embeddings
#         tf.keras.layers.Embedding(vocab_size+1, embedding_dim, input_length=maxlen, weights=[embeddings_matrix], trainable=False),
#         tf.keras.layers.Conv1D(32, 5, activation='relu'),
#         tf.keras.layers.GlobalMaxPooling1D(),
#         tf.keras.layers.Dropout(0.2),
#         tf.keras.layers.Dense(32, activation='relu'),
#         tf.keras.layers.Dense(3, activation='softmax'),
#     ])

#     model.compile(loss='sparse_categorical_crossentropy',
#                   optimizer='adam',
#                   metrics=['accuracy'])
#     return model

Model with 0.002 learning rate

In [132]:
from tensorflow.keras import optimizers

def create_model(vocab_size, embedding_dim, maxlen, embeddings_matrix):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size+1, embedding_dim, input_length=maxlen, weights=[embeddings_matrix], trainable=False),
        tf.keras.layers.Conv1D(64, 3, activation='relu'),
        tf.keras.layers.MaxPooling1D(pool_size=2),
        tf.keras.layers.Bidirectional(tf.keras.layers.GRU(64, return_sequences=True)),
        tf.keras.layers.Bidirectional(tf.keras.layers.GRU(32)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(3, activation='softmax'),
    ])
    optimizer = optimizers.Adam(learning_rate = 0.002)
    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])
    return model


Model with 0.0025 learning rate

In [133]:
# from tensorflow.keras import optimizers

# def create_model(vocab_size, embedding_dim, maxlen, embeddings_matrix):
#     model = tf.keras.Sequential([
#         tf.keras.layers.Embedding(vocab_size+1, embedding_dim, input_length=maxlen, weights=[embeddings_matrix], trainable=False),
#         tf.keras.layers.Conv1D(64, 3, activation='relu'),
#         tf.keras.layers.MaxPooling1D(pool_size=2),
#         tf.keras.layers.Bidirectional(tf.keras.layers.GRU(64, return_sequences=True)),
#         tf.keras.layers.Bidirectional(tf.keras.layers.GRU(32)),
#         tf.keras.layers.Dense(64, activation='relu'),
#         tf.keras.layers.Dropout(0.2),
#         tf.keras.layers.Dense(3, activation='softmax'),
#     ])
#     optimizer = optimizers.Adam(learning_rate = 0.0025)
#     model.compile(loss='sparse_categorical_crossentropy',
#                   optimizer=optimizer,
#                   metrics=['accuracy'])
#     return model


In [134]:
model = create_model(VOCAB_SIZE, EMBEDDING_DIM, MAXLEN, EMBEDDINGS_MATRIX)

In [135]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 16, 100)           367000    
                                                                 
 conv1d_1 (Conv1D)           (None, 14, 64)            19264     
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 7, 64)            0         
 1D)                                                             
                                                                 
 bidirectional_2 (Bidirectio  (None, 7, 128)           49920     
 nal)                                                            
                                                                 
 bidirectional_3 (Bidirectio  (None, 64)               31104     
 nal)                                                            
                                                      

In [136]:
# Train the model and save the training history
history = model.fit(train_pad_trunc_seq, train_labels, epochs=200, validation_data=(val_pad_trunc_seq, val_labels))

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [137]:
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score

# Make predictions on the validation set
val_predictions = model.predict(val_pad_trunc_seq)
val_predicted_labels = np.argmax(val_predictions, axis=1)

# Calculate precision, recall, and F1-score
precision = precision_score(val_labels, val_predicted_labels, average='weighted')
recall = recall_score(val_labels, val_predicted_labels, average='weighted')
f1 = f1_score(val_labels, val_predicted_labels, average='weighted')

print("Precision: ", precision)
print("Recall: ", recall)
print("F1-score: ", f1)


Precision:  0.7415801782158803
Recall:  0.7392739273927392
F1-score:  0.7395195516742685


Based on the evaluation metrics, our model performs relatively well with relatively high values of precision, recall, and F1-score

# Testing

In [138]:
# Assuming you have trained the model and obtained the history

# Random text for prediction
random_text = "good teaching"

# Tokenize the random text
random_text_sequence = tokenizer.texts_to_sequences([random_text])

# Pad and truncate the sequence
random_text_sequence = pad_sequences(random_text_sequence, maxlen=MAXLEN, padding=PADDING, truncating=TRUNCATING)

# Make prediction
prediction = model.predict(random_text_sequence)[0]

# Convert prediction to sentiment label
predicted_label = np.argmax(prediction)

# Map sentiment label to sentiment interpretation
sentiment = "Positive" if predicted_label == 2 else "Neutral" if predicted_label == 1 else "Negative"

# Print the prediction result
print(f"Text: {random_text}")
print(f"Predicted Sentiment: {sentiment}")


Text: good teaching
Predicted Sentiment: Positive


In [139]:
# Random text for prediction
random_text = "Bad at teaching"

# Tokenize the random text
random_text_sequence = tokenizer.texts_to_sequences([random_text])
random_text_sequence = pad_sequences(random_text_sequence, maxlen=MAXLEN, padding=PADDING, truncating=TRUNCATING)
random_text_sequence

array([[152,  61,  62,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0]], dtype=int32)

In [140]:
prediction = model.predict(random_text_sequence)[0]



In [141]:
prediction
# Convert prediction to sentiment label
predicted_label = np.argmax(prediction)

# Map sentiment label to sentiment interpretation
sentiment = "Positive" if predicted_label == 2 else "Neutral" if predicted_label == 1 else "Negative"

In [142]:
sentiment

'Negative'

In [172]:
# Assuming you have trained the model and obtained the history

# Random text for prediction
random_text = "The lectures for this subject were absolutely outstanding! The lecturer was highly knowledgeable and made the content engaging. I thoroughly enjoyed every session"

# Tokenize the random text
random_text_sequence = tokenizer.texts_to_sequences([random_text])

# Pad and truncate the sequence
random_text_sequence = pad_sequences(random_text_sequence, maxlen=MAXLEN, padding=PADDING, truncating=TRUNCATING)

# Make prediction
prediction = model.predict(random_text_sequence)[0]

# Convert prediction to sentiment label
predicted_label = np.argmax(prediction)

# Map sentiment label to sentiment interpretation
sentiment = "Positive" if predicted_label == 2 else "Neutral" if predicted_label == 1 else "Negative"

# Print the prediction result
print(f"Text: {random_text}")
print(f"Predicted Sentiment: {sentiment}")


Text: The lectures for this subject were absolutely outstanding! The lecturer was highly knowledgeable and made the content engaging. I thoroughly enjoyed every session
Predicted Sentiment: Positive


In [173]:
# Assuming you have trained the model and obtained the history

# Random text for prediction
random_text = " I found the lectures for this subject to be incredibly boring and uninspiring. The lecturer lacked enthusiasm and failed to effectively communicate the material. It was a struggle to stay engaged throughout the course"

# Tokenize the random text
random_text_sequence = tokenizer.texts_to_sequences([random_text])

# Pad and truncate the sequence
random_text_sequence = pad_sequences(random_text_sequence, maxlen=MAXLEN, padding=PADDING, truncating=TRUNCATING)

# Make prediction
prediction = model.predict(random_text_sequence)[0]

# Convert prediction to sentiment label
predicted_label = np.argmax(prediction)

# Map sentiment label to sentiment interpretation
sentiment = "Positive" if predicted_label == 2 else "Neutral" if predicted_label == 1 else "Negative"

# Print the prediction result
print(f"Text: {random_text}")
print(f"Predicted Sentiment: {sentiment}")


Text:  I found the lectures for this subject to be incredibly boring and uninspiring. The lecturer lacked enthusiasm and failed to effectively communicate the material. It was a struggle to stay engaged throughout the course
Predicted Sentiment: Negative


In [174]:
# Assuming you have trained the model and obtained the history

# Random text for prediction
random_text = "The lectures for this subject were average. The lecturer was competent in delivering the content, but it lacked the wow factor. It was neither exceptional nor disappointing"

# Tokenize the random text
random_text_sequence = tokenizer.texts_to_sequences([random_text])

# Pad and truncate the sequence
random_text_sequence = pad_sequences(random_text_sequence, maxlen=MAXLEN, padding=PADDING, truncating=TRUNCATING)

# Make prediction
prediction = model.predict(random_text_sequence)[0]

# Convert prediction to sentiment label
predicted_label = np.argmax(prediction)

# Map sentiment label to sentiment interpretation
sentiment = "Positive" if predicted_label == 2 else "Neutral" if predicted_label == 1 else "Negative"

# Print the prediction result
print(f"Text: {random_text}")
print(f"Predicted Sentiment: {sentiment}")


Text: The lectures for this subject were average. The lecturer was competent in delivering the content, but it lacked the wow factor. It was neither exceptional nor disappointing
Predicted Sentiment: Neutral


In [169]:
df['reviews'][887]
df['sentiment'][887]

1

# Deployment

In [146]:
import tensorflow as tf
from tensorflow import keras

In [147]:
 tf.saved_model.save(model, 'saved_model')
 model.save('model.h5')



In [149]:
converter = tf.lite.TFLiteConverter.from_saved_model('saved_model')
tflite_model = converter.convert()
with open('model.tflite', 'wb') as f:
    f.write(tflite_model)

ConverterError: ignored