#Task-1


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Importing Libraries

In [3]:
import numpy as np
import pandas as pd
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
import re
import tensorflow as tf
# Download required NLTK resources
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

# Load the dataset

In [4]:
dataset = pd.read_csv("/content/drive/MyDrive/AI MASTER CLASS/DATASET/WELFake_Dataset.csv")

In [5]:
dataset.head()

Unnamed: 0.1,Unnamed: 0,title,text,label
0,0,LAW ENFORCEMENT ON HIGH ALERT Following Threat...,No comment is expected from Barack Obama Membe...,1
1,1,,Did they post their votes for Hillary already?,1
2,2,UNBELIEVABLE! OBAMA’S ATTORNEY GENERAL SAYS MO...,"Now, most of the demonstrators gathered last ...",1
3,3,"Bobby Jindal, raised Hindu, uses story of Chri...",A dozen politically active pastors came here f...,0
4,4,SATAN 2: Russia unvelis an image of its terrif...,"The RS-28 Sarmat missile, dubbed Satan 2, will...",1


In [6]:
dataset.shape

(72134, 4)

In [7]:
dataset.isnull().sum()

Unnamed: 0      0
title         558
text           39
label           0
dtype: int64

In [61]:
dataset.dropna(inplace = True)

In [62]:
dataset = dataset[:50000]

# Data preprocessing



In [63]:
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))


In [64]:
def preprocess_text(text):
    # Remove special characters and digits
    text = re.sub('[^a-zA-Z]', ' ', text)
    # Convert to lowercase
    text = text.lower()
    # Tokenize the text
    tokens = word_tokenize(text)
    # Remove stopwords and lemmatize the tokens
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token not in stop_words]
    # Join tokens back into a single string
    text = ' '.join(tokens)
    return text

# Apply preprocessing to the 'text' column

In [65]:
dataset['processed_text'] = dataset['text'].apply(preprocess_text)

# Split the dataset into training and testing sets

In [66]:
X_train, X_test, y_train, y_test = train_test_split(dataset['processed_text'], dataset['label'], test_size=0.2, random_state=42)

# Vectorize the text using TF-IDF


In [67]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
tfidf_train = tfidf_vectorizer.fit_transform(X_train)
tfidf_test = tfidf_vectorizer.transform(X_test)

# Build the TensorFlow model

In [68]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(tfidf_train.shape[1],)),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model

In [69]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model

In [70]:
model.fit(tfidf_train.toarray(), y_train, epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f4a9ff7d2a0>

# Make predictions on the test set


In [71]:
y_pred_prob = model.predict(tfidf_test.toarray())
y_pred = (y_pred_prob > 0.5).astype(int)



# Calculate accuracy

In [72]:
# Convert y_test and y_pred to numpy arrays
y_test = np.asarray(y_test)
y_pred = np.asarray(y_pred)


# Convert y_test to binary values (0 and 1)
y_test = y_test.astype(int)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

Accuracy: 0.902


# Get user input and predict

In [84]:
user_input = input('Enter a text to check if it is fake news: ')
processed_input = preprocess_text(user_input)
tfidf_input = tfidf_vectorizer.transform([processed_input])
prediction = (model.predict(tfidf_input.toarray()) > 0.5).astype(int)

# Print the prediction
if prediction[0] == 0:
    print("The input is predicted to be REAL news.")
else:
    print("The input is predicted to be FAKE news.")


Enter a text to check if it is fake news: "COVID-19 Vaccines Show Promising Results in Clinical Trials"
The input is predicted to be REAL news.


Fake news:

"Scientists confirm that the Earth is flat and have been hiding this information for years."

"New study claims that eating chocolate cake every day can help you lose weight."

"Breaking: Alien invasion imminent, according to anonymous government sources."

Real news:


"COVID-19 Vaccines Show Promising Results in Clinical Trials"

"Economic growth accelerates, creating abundant job prospects."

"Economic growth reaches record levels, boosting job opportunities."