### Sentiment Analysis

In [31]:
import os
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, SimpleRNN, LSTM, GRU, Dropout, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

# Paths
train_pos_dir = "train/pos/"
train_neg_dir = "train/neg/"
test_pos_dir = "test/pos/"
test_neg_dir = "test/neg/"

# Function to load data
def load_data(directory):
    texts, labels = [], []
    for file in os.listdir(directory):
        with open(os.path.join(directory, file), 'r', encoding='utf-8') as f:
            texts.append(f.read())
        labels.append(1 if "pos" in directory else 0)
    return texts, labels

# Load all data
train_texts_pos, train_labels_pos = load_data(train_pos_dir)
train_texts_neg, train_labels_neg = load_data(train_neg_dir)
test_texts_pos, test_labels_pos = load_data(test_pos_dir)
test_texts_neg, test_labels_neg = load_data(test_neg_dir)

# Combine data
texts = train_texts_pos + train_texts_neg + test_texts_pos + test_texts_neg
labels = train_labels_pos + train_labels_neg + test_labels_pos + test_labels_neg

# Tokenization
tokenizer = Tokenizer(num_words=20000, oov_token="<UNK>")
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=300)

# Encode labels to one-hot
labels = to_categorical(labels, num_classes=2)

# Split data
X_train, X_temp, y_train, y_temp = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Model Parameters
vocab_size = 20000
embedding_dim = 128
max_length = 300
batch_size = 64
epochs = 10

# Define RNN Model
def build_rnn_model():
    model = Sequential([
        Embedding(vocab_size, embedding_dim, input_length=max_length),
        SimpleRNN(128, return_sequences=False),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dense(2, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Define LSTM Model
def build_lstm_model():
    model = Sequential([
        Embedding(vocab_size, embedding_dim, input_length=max_length),
        LSTM(128, return_sequences=False),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dense(2, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Define GRU Model
def build_gru_model():
    model = Sequential([
        Embedding(vocab_size, embedding_dim, input_length=max_length),
        GRU(128, return_sequences=False),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dense(2, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Build Models
rnn_model = build_rnn_model()
lstm_model = build_lstm_model()
gru_model = build_gru_model()

# Train RNN Model
rnn_history = rnn_model.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=batch_size, epochs=epochs)
# Evaluate Models
print("\nEvaluating RNN Model:")
rnn_eval = rnn_model.evaluate(X_test, y_test)
print(f"Test Loss: {rnn_eval[0]:.4f}, Test Accuracy: {rnn_eval[1]:.4f}")



Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 130ms/step - accuracy: 0.4989 - loss: 0.7269 - val_accuracy: 0.5016 - val_loss: 0.6964
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 130ms/step - accuracy: 0.5171 - loss: 0.6988 - val_accuracy: 0.6116 - val_loss: 0.6527
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 129ms/step - accuracy: 0.6604 - loss: 0.6173 - val_accuracy: 0.7098 - val_loss: 0.5891
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 127ms/step - accuracy: 0.7000 - loss: 0.5839 - val_accuracy: 0.7500 - val_loss: 0.5561
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 140ms/step - accuracy: 0.5593 - loss: 0.6785 - val_accuracy: 0.6702 - val_loss: 0.6011
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 127ms/step - accuracy: 0.6637 - loss: 0.6038 - val_accuracy: 0.6766 - val_loss: 0.6036
Epoch 7/10

In [32]:
# Train LSTM Model
lstm_history = lstm_model.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=batch_size, epochs=epochs)
# Evaluate Models
print("\nEvaluating LSTM Model:")
lstm_eval = lstm_model.evaluate(X_test, y_test)
print(f"Test Loss: {lstm_eval[0]:.4f}, Test Accuracy: {lstm_eval[1]:.4f}")


Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m241s[0m 382ms/step - accuracy: 0.7150 - loss: 0.5373 - val_accuracy: 0.8466 - val_loss: 0.3653
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m238s[0m 380ms/step - accuracy: 0.8523 - loss: 0.3464 - val_accuracy: 0.8754 - val_loss: 0.2935
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m290s[0m 424ms/step - accuracy: 0.9276 - loss: 0.1959 - val_accuracy: 0.8710 - val_loss: 0.3059
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m235s[0m 376ms/step - accuracy: 0.9568 - loss: 0.1237 - val_accuracy: 0.8906 - val_loss: 0.2989
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m240s[0m 384ms/step - accuracy: 0.9719 - loss: 0.0858 - val_accuracy: 0.8864 - val_loss: 0.3636
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m225s[0m 360ms/step - accuracy: 0.9740 - loss: 0.0748 - val_accuracy: 0.8798 - val_loss: 0.3419
Epoc

In [33]:
# Train GRU Model
gru_history = gru_model.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=batch_size, epochs=epochs)
# Evaluate Models
print("\nEvaluating GRU Model:")
gru_eval = gru_model.evaluate(X_test, y_test)
print(f"Test Loss: {gru_eval[0]:.4f}, Test Accuracy: {gru_eval[1]:.4f}")

Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m248s[0m 392ms/step - accuracy: 0.6962 - loss: 0.5387 - val_accuracy: 0.8716 - val_loss: 0.3155
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m242s[0m 387ms/step - accuracy: 0.9202 - loss: 0.2138 - val_accuracy: 0.9050 - val_loss: 0.2397
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m240s[0m 384ms/step - accuracy: 0.9624 - loss: 0.1108 - val_accuracy: 0.9024 - val_loss: 0.2895
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m246s[0m 393ms/step - accuracy: 0.9821 - loss: 0.0556 - val_accuracy: 0.8970 - val_loss: 0.3303
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m248s[0m 397ms/step - accuracy: 0.9893 - loss: 0.0356 - val_accuracy: 0.8948 - val_loss: 0.4316
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m242s[0m 387ms/step - accuracy: 0.9943 - loss: 0.0206 - val_accuracy: 0.8934 - val_loss: 0.5092
Epoc