In [None]:
# Text Classification with TensorFlow (Kaggle Dataset)

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Kaggle setup for downloading dataset
!pip install kaggle
!mkdir -p ~/.kaggle
!echo '{"username":"your_kaggle_username","key":"your_kaggle_api_key"}' > ~/.kaggle/kaggle.json
!chmod 600 ~/.kaggle/kaggle.json

# Download the Fake and Real News Dataset from Kaggle
!kaggle datasets download -d clmentbisaillon/fake-and-real-news-dataset
!unzip fake-and-real-news-dataset.zip

df = pd.read_csv('True.csv')
df['label'] = 'real'
df_fake = pd.read_csv('Fake.csv')
df_fake['label'] = 'fake'
df = pd.concat([df, df_fake]).reset_index(drop=True)

# Display sample data
print("Dataset Sample:")
print(df.head())

# Preprocess the data
texts = df['text'].values
labels = df['label'].values

# Encode labels
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

# Tokenize text data
max_words = 10000
tokenizer = keras.preprocessing.text.Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

# Pad sequences to ensure uniform input size
max_length = 100
X_train_pad = keras.preprocessing.sequence.pad_sequences(X_train_seq, maxlen=max_length, padding='post')
X_test_pad = keras.preprocessing.sequence.pad_sequences(X_test_seq, maxlen=max_length, padding='post')

# Define the model
model = keras.Sequential([
    keras.layers.Embedding(max_words, 16, input_length=max_length),
    keras.layers.GlobalAveragePooling1D(),
    keras.layers.Dense(16, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
print("Training the model...")
history = model.fit(X_train_pad, y_train, epochs=10, validation_data=(X_test_pad, y_test), batch_size=32)

# Evaluate the model
loss, accuracy = model.evaluate(X_test_pad, y_test)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}")

# Save the trained model
model.save('text_classification_model.h5')

print("Project completed successfully!")
