In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

# Load the dataset
url = "https://zenodo.org/record/3609356/files/crowdsourced.csv?download=1"
df = pd.read_csv(url)

# Split the dataset into training and testing sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Preprocessing: Tokenize and pad the sequences
tokenizer = Tokenizer(num_words=10000, oov_token="<OOV>")
tokenizer.fit_on_texts(train_df["Text"])

X_train = tokenizer.texts_to_sequences(train_df["Text"])
X_test = tokenizer.texts_to_sequences(test_df["Text"])

X_train = pad_sequences(X_train, maxlen=256, padding="post")
X_test = pad_sequences(X_test, maxlen=256, padding="post")

y_train = train_df["Verdict"]
y_test = test_df["Verdict"]

# Define the Keras LSTM model
def create_lstm_model():
    model = Sequential()
    model.add(Embedding(input_dim=10000, output_dim=128, input_length=256))
    model.add(LSTM(128, return_sequences=True))
    model.add(Dropout(0.5))
    model.add(LSTM(64))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation="sigmoid"))

    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return model

# Instantiate and train the Keras LSTM classifier
lstm_classifier = KerasClassifier(build_fn=create_lstm_model, epochs=5, batch_size=64)
lstm_classifier.fit(X_train, y_train)

# Test the classifier
y_pred = lstm_classifier.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

print("Classification Report:")
print(classification_report(y_test, y_pred))


  lstm_classifier = KerasClassifier(build_fn=create_lstm_model, epochs=5, batch_size=64)
2023-04-18 08:40:25.646729: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-18 08:40:25.658893: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Accuracy: 11.15%
Classification Report:
              precision    recall  f1-score   support

          -1       0.00      0.00      0.00      2926
           0       0.11      1.00      0.20       502
           1       0.00      0.00      0.00      1073

    accuracy                           0.11      4501
   macro avg       0.04      0.33      0.07      4501
weighted avg       0.01      0.11      0.02      4501



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
