In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

# Load data
data = pd.read_csv("news_data_final.csv")

# Preprocess and split data
x = data["text"]
y = data["class"]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

# Tokenize and pad sequences
max_features = 10000
maxlen = 128

tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(x_train)
x_train_seq = tokenizer.texts_to_sequences(x_train)
x_test_seq = tokenizer.texts_to_sequences(x_test)
x_train_padded = pad_sequences(x_train_seq, maxlen=maxlen)
x_test_padded = pad_sequences(x_test_seq, maxlen=maxlen)

# Build the LSTM model
embedding_dim = 100
hidden_dim = 128

model = Sequential()
model.add(Embedding(max_features, embedding_dim, input_length=maxlen))
model.add(LSTM(hidden_dim, dropout=0.5, recurrent_dropout=0.5))
model.add(Dense(2, activation="softmax"))

# Compile and train the model
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()

num_epochs = 3
model.fit(x_train_padded, y_train, epochs=num_epochs, validation_split=0.1)




Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 128, 100)          1000000   
                                                                 
 lstm (LSTM)                 (None, 128)               117248    
                                                                 
 dense (Dense)               (None, 2)                 258       
                                                                 
Total params: 1,117,506
Trainable params: 1,117,506
Non-trainable params: 0
_________________________________________________________________
Epoch 1/3


2023-05-10 17:40:39.995328: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x2916fb580>

In [2]:
# Evaluate the model
probabilities = model.predict(x_test_padded)
y_pred = probabilities.argmax(axis=-1)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print(f"Classification Report:\n{report}")

Accuracy: 0.9667
Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.98      0.97      5396
           1       0.97      0.95      0.96      4851

    accuracy                           0.97     10247
   macro avg       0.97      0.97      0.97     10247
weighted avg       0.97      0.97      0.97     10247



In [8]:
def predict_news(raw_text, model, tokenizer, maxlen=128):
    # Preprocess the input text
    input_seq = tokenizer.texts_to_sequences([raw_text])
    input_padded = pad_sequences(input_seq, maxlen=maxlen)
    
    # Make the prediction
    probabilities = model.predict(input_padded)
    prediction = probabilities.argmax(axis=-1)
    
    return prediction

# Load the saved model for future use
# from tensorflow.keras.models import load_model
# model = load_model("keras_lstm_fakenews_detector.h5")

# Example usage
raw_text = input()
prediction = predict_news(raw_text, model, tokenizer)

if prediction == 0:
    print("The news is likely to be fake.")
else:
    print("The news is likely to be real.")


Pakistan's former prime minister Imran Khan has been arrested at the High Court in the capital, Islamabad. Mr Khan was appearing in court on charges of corruption, which he says are politically motivated. Footage showed dozens of paramilitary forces in armoured vehicles detaining the 70-year-old after he entered the court compound, then driving him away. Unrest broke out in several cities following his arrest, with protesters blocking key roads. Mr Khan was ousted as PM in April last year and has been campaigning for early elections since then.
The news is likely to be real.


In [3]:
# Save the trained model
model.save("keras_lstm_fakenews_detector.h5")

In [4]:
import sys
print(sys.executable)



/Users/mac/anaconda3/bin/python


In [14]:
import pickle

# Save the tokenizer
with open("tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)

# Save the tokenizer
with open("class_R.pkl", "wb") as f:
    pickle.dump(accuracy, f)


In [None]:
!streamlit run app.py


[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://192.168.1.132:8501[0m
[0m
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
2023-05-10 18:19:44.380798: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_pers

In [1]:
import pandas as pd
data = pd.read_csv("news_data_final.csv")
data.shape

(51233, 2)