In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

# Load the dataset
data = pd.read_csv('data.csv', encoding='latin1', header=None)
data.columns = ['target', 'ids', 'date', 'flag', 'user', 'text']

# Preprocess the data
def preprocess_data(data):
    # Convert the target to binary
    data['target'] = data['target'].apply(lambda x: 1 if x == 4 else 0)
    
    # Tokenize the text
    tokenizer = Tokenizer(num_words=5000)
    tokenizer.fit_on_texts(data['text'])
    sequences = tokenizer.texts_to_sequences(data['text'])
    
    # Pad the sequences
    max_length = 100
    padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')
    
    return padded_sequences, data['target'], tokenizer

X, y, tokenizer = preprocess_data(data)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
def create_lstm_cnn_model(vocab_size, embedding_dim, max_length):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
        tf.keras.layers.LSTM(32, return_sequences=True),
        tf.keras.layers.Conv1D(32, 3, activation='relu'),
        tf.keras.layers.MaxPooling1D(pool_size=2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

vocab_size = 5000
embedding_dim = 32
max_length = 100

lstm_cnn_model = create_lstm_cnn_model(vocab_size, embedding_dim, max_length)
lstm_cnn_model.summary()



In [7]:
# Train LSTM-CNN model
lstm_cnn_history = lstm_cnn_model.fit(X_train, y_train, epochs=10, batch_size=128, validation_split=0.2)

# Evaluate LSTM-CNN model
lstm_cnn_loss, lstm_cnn_accuracy = lstm_cnn_model.evaluate(X_test, y_test)
print(f'LSTM-CNN Model Accuracy: {lstm_cnn_accuracy * 100:.2f}%')

Epoch 1/10
[1m8000/8000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m407s[0m 50ms/step - accuracy: 0.7778 - loss: 0.4617 - val_accuracy: 0.8109 - val_loss: 0.4134
Epoch 2/10
[1m8000/8000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m421s[0m 53ms/step - accuracy: 0.8165 - loss: 0.4013 - val_accuracy: 0.8170 - val_loss: 0.4022
Epoch 3/10
[1m8000/8000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m459s[0m 57ms/step - accuracy: 0.8255 - loss: 0.3854 - val_accuracy: 0.8193 - val_loss: 0.3973
Epoch 4/10
[1m8000/8000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m472s[0m 59ms/step - accuracy: 0.8310 - loss: 0.3748 - val_accuracy: 0.8206 - val_loss: 0.3948
Epoch 5/10
[1m8000/8000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m497s[0m 62ms/step - accuracy: 0.8359 - loss: 0.3651 - val_accuracy: 0.8214 - val_loss: 0.3951
Epoch 6/10
[1m8000/8000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m498s[0m 62ms/step - accuracy: 0.8411 - loss: 0.3562 - val_accuracy: 0.8214 - val_loss: 0.397

In [6]:
import tensorflow as tf

def create_lstm_cnn_model(vocab_size, embedding_dim, max_length):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32, return_sequences=True)),
        tf.keras.layers.Conv1D(32, 3, activation='relu'),
        tf.keras.layers.MaxPooling1D(pool_size=2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

vocab_size = 5000
embedding_dim = 32
max_length = 100

lstm_cnn_model = create_lstm_cnn_model(vocab_size, embedding_dim, max_length)
lstm_cnn_model.summary()
