**Beginner Task 3**

In [None]:
from google.colab import files
uploaded = files.upload()

Saving Fake.csv to Fake.csv
Saving True.csv to True.csv


In [None]:
pip install pandas numpy scikit-learn nltk flask



In [None]:
import pandas as pd
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer

nltk.download('stopwords')
nltk.download('wordnet')

true = pd.read_csv("True.csv")
fake = pd.read_csv("Fake.csv")

true["label"] = 1
fake["label"] = 0

df = pd.concat([true, fake])
df["text"] = df["title"] + " " + df["text"]

stop_words = set(stopwords.words("english"))
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()

def clean(text):
    text = text.lower()
    text = ''.join([c for c in text if c not in string.punctuation])
    words = text.split()
    words = [stemmer.stem(lemmatizer.lemmatize(w)) for w in words if w not in stop_words]
    return ' '.join(words)

df["text"] = df["text"].apply(clean)
df[["text", "label"]].to_csv("cleaned_news.csv", index=False)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


**Train Model Naive Bayes**

In [None]:
import pandas as pd, pickle
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB

df = pd.read_csv("cleaned_news.csv")
X_train, X_test, y_train, y_test = train_test_split(df["text"], df["label"], test_size=0.2)

vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

model = MultinomialNB()
model.fit(X_train_vec, y_train)

print("Naive Bayes Accuracy:", model.score(X_test_vec, y_test))

pickle.dump(model, open("naive_bayes_model.pkl", "wb"))
pickle.dump(vectorizer, open("vectorizer.pkl", "wb"))


Naive Bayes Accuracy: 0.9443207126948775


**Train Model Random Forest**

In [None]:
import pandas as pd, pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

df = pd.read_csv("cleaned_news.csv")
X_train, X_test, y_train, y_test = train_test_split(df["text"], df["label"], test_size=0.2)

vectorizer = TfidfVectorizer(max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

model = RandomForestClassifier(n_estimators=100)
model.fit(X_train_vec, y_train)

print("Random Forest Accuracy:", model.score(X_test_vec, y_test))

pickle.dump(model, open("random_forest_model.pkl", "wb"))
pickle.dump(vectorizer, open("vectorizer_rf.pkl", "wb"))


Random Forest Accuracy: 0.998218262806236


**Train Model lSTM**

In [None]:
import pandas as pd, pickle
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

df = pd.read_csv("cleaned_news.csv")
X = df["text"].values
y = df["label"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

max_len = 300
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)

model = Sequential([
    Embedding(5000, 64, input_length=max_len),
    LSTM(64, dropout=0.2, recurrent_dropout=0.2),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train_pad, y_train, epochs=3, batch_size=64, validation_data=(X_test_pad, y_test))

loss, acc = model.evaluate(X_test_pad, y_test)
print("LSTM Accuracy:", acc)

model.save("lstm_model.h5")
with open("lstm_tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)




Epoch 1/3
[1m562/562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m278s[0m 485ms/step - accuracy: 0.9145 - loss: 0.2382 - val_accuracy: 0.9872 - val_loss: 0.0463
Epoch 2/3
[1m562/562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m279s[0m 496ms/step - accuracy: 0.9824 - loss: 0.0571 - val_accuracy: 0.9827 - val_loss: 0.0487
Epoch 3/3
[1m562/562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m283s[0m 503ms/step - accuracy: 0.9856 - loss: 0.0463 - val_accuracy: 0.9875 - val_loss: 0.0445
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 69ms/step - accuracy: 0.9864 - loss: 0.0471




LSTM Accuracy: 0.9875278472900391


In [None]:
pip install preprocess




**WEB Developement**

In [None]:
from flask import Flask, request, render_template_string
import pickle
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences
from preprocess import preprocess
app = Flask(__name__)
nb_model = pickle.load(open("naive_bayes.pkl", "rb"))
rf_model = pickle.load(open("random_forest.pkl", "rb"))
lstm_model = load_model("lstm_model.h5")
vectorizer = pickle.load(open("vectorizer.pkl", "rb"))
tokenizer = pickle.load(open("tokenizer.pkl", "rb"))

html = """
<!DOCTYPE html>
<html>
<head>
    <title>Fake News Detector</title>
    <style>
        body { font-family: Arial; background: #f4f4f4; padding: 20px; }
        .container { max-width: 600px; background: #fff; margin: auto; padding: 20px; border-radius: 10px; }
        textarea { width: 100%; height: 150px; }
        select, input[type=submit] { width: 100%; padding: 10px; margin-top: 10px; }
        .result { font-size: 20px; margin-top: 20px; }
    </style>
</head>
<body>
    <div class="container">
        <h2>Fake News Detection</h2>
        <form method="post">
            <textarea name="news" placeholder="Enter news article here..." required></textarea>
            <select name="model">
                <option value="nb">Naïve Bayes</option>
                <option value="rf">Random Forest</option>
                <option value="lstm">LSTM</option>
            </select>
            <input type="submit" value="Check">
        </form>
        {% if result %}
        <div class="result"><strong>Prediction:</strong> {{ result }}</div>
        {% endif %}
    </div>
</body>
</html>
"""

@app.route("/", methods=["GET", "POST"])
def index():
    result = None
    if request.method == "POST":
        text = request.form["news"]
        model_choice = request.form["model"]
        processed = preprocess(text)

        if model_choice in ["nb", "rf"]:
            vect = vectorizer.transform([processed])
            if model_choice == "nb":
                prediction = nb_model.predict(vect)[0]
            else:
                prediction = rf_model.predict(vect)[0]
        else:
            seq = tokenizer.texts_to_sequences([processed])
            pad = pad_sequences(seq, maxlen=300)
            prediction = (lstm_model.predict(pad)[0][0] > 0.5).astype(int)

        result = "REAL News ✅" if prediction == 1 else "FAKE News ❌"
    return render_template_string(html, result=result)

if __name__ == "__main__":
    app.run(debug=True)


FileNotFoundError: [Errno 2] No such file or directory: 'naive_bayes.pkl'