In [None]:
!pip install tensorflow tensorflow-hub tensorflow-text

Collecting tensorflow-text
  Downloading tensorflow_text-2.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.2/5.2 MB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tensorflow-text
Successfully installed tensorflow-text-2.15.0


# **Importing Libraries and Datasets**


In [None]:
# Importing the used libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential,load_model
from tensorflow.keras.layers import Embedding, LSTM, Dense,Dropout
from tensorflow.keras.callbacks import EarlyStopping
from google.colab import drive


In [None]:
drive.mount('/content/Files',force_remount=True) # Getting all files from my google drive account
drive_path = '/content/Files/MyDrive/Deep Bonus Project/' # The path of the files on this colab

Mounted at /content/Files


# **Egyptian Sentiment Classification Model**

In [None]:
# Load the data
file_path = drive_path+'egyptian_dataset.csv'
data = pd.read_csv(file_path)

# Shuffle the DataFrame
data = data.sample(frac=1, random_state=42).reset_index(drop=True)

# Splitting Sentences and labels
sentences = data['sentence'].values
labels = data['label'].values  # Labels are be 0 or 1

# Split the data into training and testing sets
train_sentences, test_sentences, train_labels, test_labels = train_test_split(
    sentences, labels, test_size=0.2, random_state=42
)

# Tokenize the sentences
# Out Of Vocab tokenizer: giving each word in the dataset an index, and if there is new word appeared in test it gets <OOV> token which is usually at index 0 or 1
# for example on training dataset: oov: {<OOV>:1,is:2,cat:3,beutiful:4}
# fot this sentence when converted to sequence: The cat is beutifull: [1,3,2,4]

tokenizer = Tokenizer(oov_token='<OOV>') # Defining the tokenizer
tokenizer.fit_on_texts(train_sentences)  # fitting the tokenizer on the training data

# Convert sentences to sequences of numbers
train_sequences = tokenizer.texts_to_sequences(train_sentences) # Using the tokenizer to covert from the sentences to sequences of indexes like: [1,3,2,4]
test_sequences = tokenizer.texts_to_sequences(test_sentences) # Doing the same but on test data

# Pad sequences to make them of the same length
max_length = max(len(seq) for seq in train_sequences) # Getting the maximum length of a sentence
train_padded = pad_sequences(train_sequences, maxlen=max_length, padding='post') # Applying padding of zeros (post method) to make all of the sequences of the same length
test_padded = pad_sequences(test_sequences, maxlen=max_length, padding='post') # Doing the same on test dataset

# Build the RNN model
egyptian_model = Sequential() # Sequential model is a model of sequenced layers
# Embedding layer
# Input_Dim: size of vocabulary (unique words in the dataset)
# Output_Dim: Embedding each word in a 16-dimensional vector
# input_length: the length of the input sequences (which is the maximum length of all sentences)
egyptian_model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=16, input_length=max_length))
# Adding LSTM layer with 64 memory cell
egyptian_model.add(LSTM(64))
# Applying sigmoid to do classification
egyptian_model.add(Dense(1, activation='sigmoid'))

# Compile the model
egyptian_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Splitting Sentences and labels
sentences = data['sentence'].values
labels = data['label'].values  # Labels are be 0 or 1


In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

egyptian_model_history = egyptian_model.fit(train_padded, train_labels, epochs=8, validation_data=(test_padded, test_labels), callbacks=[early_stopping])

# Evaluate the model
test_loss, test_accuracy = egyptian_model.evaluate(test_padded, test_labels)
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Test Accuracy: 91.34%


In [None]:
egyptian_model.save(drive_path+'egyptian_model_8Epochs.h5')

  saving_api.save_model(


In [None]:
egyptian_model = load_model(drive_path+'egyptian_model_8Epochs.h5')

In [None]:
def predict_sentiment(input_sentence):
    # Tokenize and pad the input sentence
    input_sequence = tokenizer.texts_to_sequences([input_sentence])
    padded_input = pad_sequences(input_sequence, maxlen=max_length, padding='post')

    # Make predictions
    predictions = egyptian_model.predict(padded_input)

    # Convert the prediction to a sentiment label
    sentiment_label = 'postivie' if predictions[0][0] > 0.5 else 'negative'

    return sentiment_label


In [None]:
# Example usage
input_sentence = input("Enter arabic sentence: ")
predicted_sentiment = predict_sentiment(input_sentence)
print(f"Predicted Sentiment: {predicted_sentiment}")

Enter arabic sentence: البورصة المصرية في افضل احوالها
Predicted Sentiment: postivie


# **French Sentiment Classification Model**

In [None]:
# Load the data
file_path = drive_path+'french_dataset.csv'
data = pd.read_csv(file_path)

# Shuffle the DataFrame
data = data.sample(frac=1, random_state=42).reset_index(drop=True)

# Preprocess the data
sentences = data['sentence'].values
labels = data['label'].values  # Labels are be 0 or 1

# Split the data into training and testing sets
train_sentences, test_sentences, train_labels, test_labels = train_test_split(
    sentences, labels, test_size=0.2, random_state=42
)

# Tokenize the sentences
tokenizer = Tokenizer(oov_token='<OOV>')
tokenizer.fit_on_texts(train_sentences)

# Convert sentences to sequences of numbers
train_sequences = tokenizer.texts_to_sequences(train_sentences)
test_sequences = tokenizer.texts_to_sequences(test_sentences)

# Pad sequences to make them of the same length
max_length = max(len(seq) for seq in train_sequences)
train_padded = pad_sequences(train_sequences, maxlen=max_length, padding='post')
test_padded = pad_sequences(test_sequences, maxlen=max_length, padding='post')

# Build the RNN model
french_model = Sequential()
french_model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=16, input_length=max_length))
french_model.add(LSTM(64))
french_model.add(Dense(64, activation='relu'))
french_model.add(Dropout(0.5))
french_model.add(Dense(1, activation='sigmoid'))

# Compile the model
french_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

french_model_history = french_model.fit(train_padded, train_labels, epochs=8, validation_data=(test_padded, test_labels), callbacks=[early_stopping])

# Evaluate the model
test_loss, test_accuracy = french_model.evaluate(test_padded, test_labels)
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Test Accuracy: 70.75%


In [None]:
french_model.save(drive_path+'french_model_8Epochs.h5')

  saving_api.save_model(


In [None]:
french_model = load_model(drive_path+'french_model_8Epochs.h5')

In [None]:
def predict_sentiment(input_sentence):
    # Tokenize and pad the input sentence
    input_sequence = tokenizer.texts_to_sequences([input_sentence])
    padded_input = pad_sequences(input_sequence, maxlen=max_length, padding='post')

    # Make predictions
    predictions = french_model.predict(padded_input)

    # Convert the prediction to a sentiment label
    sentiment_label = 'postivie' if predictions[0][0] > 0.5 else 'negative'

    return sentiment_label


In [None]:
# Example usage
input_sentence = input("Enter french sentence: ")
predicted_sentiment = predict_sentiment(input_sentence)
print(f"Predicted Sentiment: {predicted_sentiment}")

Enter french sentence: C'est comme si mon cœur se brisait de tristesse
Predicted Sentiment: negative
