In [30]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
df = pd.read_csv('/content/drive/MyDrive/Truth_Seeker/revised_cleaned_master_data.csv')

In [15]:
X= df['lemmatized_text'].astype(str)
y= df['label']

In [16]:
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(y)

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, encoded_labels, test_size=0.2, random_state=42)

In [40]:
max_len = 200

tokenizer = Tokenizer(num_words=1000, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

In [23]:
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len, padding='post', truncating='post')
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len, padding='post', truncating='post')

In [28]:
model = Sequential([
    Embedding(input_dim=1000, output_dim=128, input_length=max_len),
    Conv1D(filters=128, kernel_size=5, activation='relu'),
    GlobalMaxPooling1D(),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(len(X), activation='softmax')
])

In [39]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [37]:
history = model.fit(X_train_pad, y_train,validation_data=(X_test_pad, y_test),epochs=5,batch_size=64,verbose=2)

Epoch 1/5
956/956 - 13s - 13ms/step - accuracy: 0.7390 - loss: 0.7177 - val_accuracy: 0.8898 - val_loss: 0.2646
Epoch 2/5
956/956 - 6s - 6ms/step - accuracy: 0.9041 - loss: 0.2478 - val_accuracy: 0.9026 - val_loss: 0.2455
Epoch 3/5
956/956 - 10s - 11ms/step - accuracy: 0.9319 - loss: 0.1842 - val_accuracy: 0.9008 - val_loss: 0.2435
Epoch 4/5
956/956 - 6s - 6ms/step - accuracy: 0.9593 - loss: 0.1198 - val_accuracy: 0.8918 - val_loss: 0.3233
Epoch 5/5
956/956 - 6s - 6ms/step - accuracy: 0.9812 - loss: 0.0622 - val_accuracy: 0.8989 - val_loss: 0.3200


In [38]:
loss, accuracy = model.evaluate(X_test_pad, y_test, verbose=0)
print(f"Test Accuracy: {accuracy*100}")

Test Accuracy: 89.8908019065857
