**Project#4:**Real vs. Fake News Classifier Develop a model to differentiate between real and fake news articles. This
project introduces you to the challenges of working with textual data, including dealing with large
vocabularies, and teaches you how to build and fine-tune models for text classification.

In [1]:
pip install tensorflow pandas scikit-learn




# Importing Libraries

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, Dropout
from tensorflow.keras.optimizers import Adam


# Upload Dataset

In [9]:

df = pd.read_csv('/content/news_articles.csv/news_articles.csv')


# Preprocessing

In [10]:
df = df[['text', 'label']].dropna()  # Keep only 'text' and 'label' columns and drop missing values

In [11]:
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])  # Real -> 1, Fake -> 0


In [12]:
# Tokenization and padding
max_words = 20000  # Vocabulary size
max_len = 200      # Maximum length of input sequences
tokenizer = Tokenizer(num_words=max_words, oov_token='<OOV>')
tokenizer.fit_on_texts(df['text'].values)

In [13]:
# Convert texts to sequences
sequences = tokenizer.texts_to_sequences(df['text'].values)
padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post', truncating='post')


# Train Test split

In [15]:
X = padded_sequences
y = df['label'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Build Model

In [16]:
model = Sequential([
    Embedding(max_words, 128, input_length=max_len),  # Embedding layer
    Bidirectional(LSTM(64, return_sequences=True)),   # Bi-LSTM layer
    Dropout(0.5),
    Bidirectional(LSTM(32)),  # Second Bi-LSTM layer
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])



# Compiling

In [17]:
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])


In [18]:
model.summary()


# Training Model

In [19]:
history = model.fit(
    X_train, y_train,
    epochs=10,
    batch_size=32,
    validation_data=(X_test, y_test),
    verbose=1
)

Epoch 1/10
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 559ms/step - accuracy: 0.6073 - loss: 0.6767 - val_accuracy: 0.6195 - val_loss: 0.5969
Epoch 2/10
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 530ms/step - accuracy: 0.8290 - loss: 0.4112 - val_accuracy: 0.7317 - val_loss: 0.5279
Epoch 3/10
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 566ms/step - accuracy: 0.9680 - loss: 0.1501 - val_accuracy: 0.7585 - val_loss: 0.6490
Epoch 4/10
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 531ms/step - accuracy: 0.9806 - loss: 0.0730 - val_accuracy: 0.7512 - val_loss: 0.9113
Epoch 5/10
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 520ms/step - accuracy: 0.9915 - loss: 0.0376 - val_accuracy: 0.7439 - val_loss: 0.9338
Epoch 6/10
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 555ms/step - accuracy: 0.9937 - loss: 0.0329 - val_accuracy: 0.7488 - val_loss: 1.0461
Epoch 7/10
[1m52/52[

# Evaluating model

In [20]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy:.2f}')

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 101ms/step - accuracy: 0.7408 - loss: 1.0725
Test Accuracy: 0.74


# Testing with new data

In [23]:
def predict_news(text):
    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=max_len, padding='post', truncating='post')
    pred = model.predict(padded)
    return "Real" if pred[0] > 0.5 else "Fake"


# Example usage with user input
example_text = input("Enter a news article text to predict whether it's Real or Fake: ")
print(f'The news is: {predict_news(example_text)}')

#Predict following text:
#"Scientists discovered a way to make humans invisible using a secret potion from an undisclosed location.
#This breakthrough is being hidden by world leaders to control society."


Enter a news article text to predict whether it's Real or Fake: "Scientists discovered a way to make humans invisible using a secret potion from an undisclosed location. This breakthrough is being hidden by world leaders to control society."
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
The news is: Fake
