# Downloading Libraries


In [1]:
! pip install nltk
! pip install vaderSentiment
! pip install pytrends
! pip install textblob
! pip install wordcloud
! pip install gensim
! pip install seaborn
! pip install TextBlob
! pip install joblib



In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split


from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

ModuleNotFoundError: No module named 'tensorflow'

In [None]:
# Download NLTK resources (if not already downloaded)
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('wordnet', quiet=True)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

# Importing Dataset


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
df = pd.read_csv('/content/drive/MyDrive/Datasets/Cleaned Data/reviews.csv')
df.head()

Unnamed: 0,listing_id,reviewer_name,comments,cleaned_comments,polarity,sentiment
0,4326511,Laura,Das Zimmer und das Bad waren sauber und komfor...,da zimmer und da bad waren sauber und komforta...,-0.7,negative
1,603032069621870277,Fatima,Bad service,bad servic,-0.7,negative
2,8629818,Andrew,"In Paul's absence, Lucy and Jack were amazing ...",paul absenc luci jack amaz host brthe secret g...,-0.4,negative
3,25012636,Kate,I used Nicholas’ room as a base whilst I was i...,use nicholas’ room base whilst properti busi f...,-0.8,negative
4,12725143,Yvon,You can’t go wrong with the townhouse. It has ...,can’t go wrong townhous amen need place spotle...,-0.5,negative


# Date Preprocessing

In [None]:
df.isnull().sum()

listing_id            0
reviewer_name        63
comments             63
cleaned_comments    209
polarity            163
sentiment           163
dtype: int64

In [None]:
# Dropping the null values
df.dropna(inplace=True)
df.isnull().sum()

listing_id          0
reviewer_name       0
comments            0
cleaned_comments    0
polarity            0
sentiment           0
dtype: int64

## **<center><u>Assessment: Experimentations (using Deep Learning)</u></center>**


In [None]:
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    # Tokenize and remove stop words
    tokens = word_tokenize(text)
    tokens = [word for word in tokens if word.isalpha()
              and word not in stop_words]
    return " ".join(tokens)

In [None]:
# Apply preprocessing to cleaned comments
df['processed_comments'] = df['cleaned_comments'].apply(preprocess_text)

In [None]:
# Tokenize text
tokenizer = Tokenizer(num_words=500, oov_token="<OOV>")
tokenizer.fit_on_texts(df['processed_comments'])
sequences = tokenizer.texts_to_sequences(df['processed_comments'])

In [None]:
# Pad sequences
max_length = max(len(x) for x in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')

In [None]:
# Encode labels
labels = pd.get_dummies(df['sentiment']).values

In [None]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    padded_sequences, labels, test_size=0.2, random_state=42)

In [None]:
model = Sequential()
model.add(Embedding(5000, 128, input_length=max_length))
model.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(labels.shape[1], activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam', metrics=['accuracy'])
model.summary()



Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 708, 128)          640000    
                                                                 
 lstm_1 (LSTM)               (None, 64)                49408     
                                                                 
 dense_2 (Dense)             (None, 32)                2080      
                                                                 
 dropout_1 (Dropout)         (None, 32)                0         
                                                                 
 dense_3 (Dense)             (None, 3)                 99        
                                                                 
Total params: 691587 (2.64 MB)
Trainable params: 691587 (2.64 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1)

history = model.fit(X_train, y_train, epochs=100, batch_size=128,
                    validation_split=0.2, callbacks=[early_stopping])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 5: early stopping


In [None]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100}%")

Test Accuracy: 32.71551728248596%
