In [1]:
import pandas as pd

In [2]:
df_final = pd.read_csv('Stemmed_df_tweets copy')

In [15]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Tokenize text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df_final['Text'])
sequences = tokenizer.texts_to_sequences(df_final['Text'])
word_index = tokenizer.word_index

# Pad sequences
maxlen = 100  # Choose the maximum length of sequences
data = pad_sequences(sequences, maxlen=maxlen)

# Encode labels
labels = df_final['Biased']

# Split the data into training and validation sets
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(data, labels, test_size=0.2, random_state=42)

# Build RNN model
model = Sequential()
model.add(Embedding(input_dim=len(word_index)+1, output_dim=100))
model.add(LSTM(128))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))

# Evaluate the model
loss, accuracy = model.evaluate(X_val, y_val)
print("Validation Accuracy:", accuracy)

Epoch 1/10
[1m209/209[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 79ms/step - accuracy: 0.7719 - loss: 0.5449 - val_accuracy: 0.8337 - val_loss: 0.4016
Epoch 2/10
[1m209/209[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 83ms/step - accuracy: 0.8876 - loss: 0.2892 - val_accuracy: 0.8248 - val_loss: 0.4449
Epoch 3/10
[1m209/209[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 85ms/step - accuracy: 0.9399 - loss: 0.1664 - val_accuracy: 0.8158 - val_loss: 0.4832
Epoch 4/10
[1m209/209[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 77ms/step - accuracy: 0.9701 - loss: 0.0809 - val_accuracy: 0.7931 - val_loss: 0.5945
Epoch 5/10
[1m209/209[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 80ms/step - accuracy: 0.9860 - loss: 0.0449 - val_accuracy: 0.8008 - val_loss: 0.8983
Epoch 6/10
[1m209/209[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 75ms/step - accuracy: 0.9888 - loss: 0.0371 - val_accuracy: 0.7931 - val_loss: 1.2491
Epoch 7/10
[1m2

In [46]:
comment = "zionism facism"
comment_sequence = tokenizer.texts_to_sequences([comment])
comment_padded = pad_sequences(comment_sequence, maxlen=100)

# Predict the label
prediction = model.predict(comment_padded)
print(prediction)
# Interpret the prediction
if prediction > 0.9:
    print("The comment is biased.")
else:
    print("The comment is not biased.")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[[0.9947776]]
The comment is biased.
