In [None]:
#required packages
!pip install flask flask-cors pyngrok newspaper3k lime tensorflow nltk pymupdf lxml==4.9.3 lxml_html_clean requests

In [None]:
#API for ngrok
!ngrok config add-authtoken "2vMiRmqM75KyG7V5xKNGOelIZ78_EkWAHZkZRzLB4mqRroVt"

In [None]:
from flask import Flask, request, jsonify
from flask_cors import CORS
from newspaper import Article
from lime.lime_text import LimeTextExplainer
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle
import re
import nltk
import numpy as np
import requests
import fitz
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from pyngrok import ngrok
import smtplib
from email.mime.text import MIMEText

#NLTK setup
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('punkt_tab')

#Flask setup
app = Flask(__name__)
CORS(app)

#Preprocessing function
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'https?://\S+|www\.\S+', '', text)
    text = re.sub(r'<.*?>', '', text)
    text = re.sub(r'[^a-z\s]', '', text)
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words('english'))
    tokens = [w for w in tokens if w not in stop_words]
    stemmer = PorterStemmer()
    stemmed_tokens = [stemmer.stem(w) for w in tokens]
    return " ".join(stemmed_tokens)

#Load model and tokenizer
model = load_model("lstm_model.h5")
with open("tokenizer.pkl", "rb") as f:
    tokenizer = pickle.load(f)

MAXLEN = 200

#LIME explanation function
def explain_with_lime(model, tokenizer, raw_text, maxlen=MAXLEN):
    explainer = LimeTextExplainer(class_names=["fake", "real"])
    stop_words = set(stopwords.words('english'))

    def predict_proba(texts):
        sequences = tokenizer.texts_to_sequences([preprocess_text(t) for t in texts])
        padded = pad_sequences(sequences, maxlen=maxlen)
        probs = model.predict(padded, verbose=0)
        return np.hstack([1 - probs, probs])

    exp = explainer.explain_instance(raw_text, predict_proba, num_features=10)
    return [word for word, _ in exp.as_list() if word.lower() not in stop_words]


#Extract text from PDF
def extract_text_from_pdf(url):
    headers = {
        "User-Agent": (
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/122.0.0.0 Safari/537.36"
        )
    }
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        raise ValueError("Failed to download PDF.")

    with open("temp.pdf", "wb") as f:
        f.write(response.content)

    text = ""
    doc = fitz.open("temp.pdf")
    for page in doc:
        text += page.get_text()
    return text

#Main API endpoint
@app.route("/analyze", methods=["POST"])
def analyze():
    try:
        url = request.json.get("url")
        if not url or not url.startswith("http"):
            return jsonify({"error": "Invalid or missing URL"}), 400

        headers = {
            "User-Agent": (
                "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
                "AppleWebKit/537.36 (KHTML, like Gecko) "
                "Chrome/122.0.0.0 Safari/537.36"
            )
        }

        if url.lower().endswith(".pdf"):
            try:
                raw_text = extract_text_from_pdf(url)
            except Exception as e:
                return jsonify({"error": f"PDF extraction failed: {str(e)}"}), 400
        else:
            article = Article(url, request_headers=headers)
            try:
                article.download()
                article.parse()
                raw_text = article.text
            except Exception as e:
                return jsonify({"error": f"Failed to fetch article: {str(e)}"}), 400

        if not raw_text.strip():
            return jsonify({"error": "Could not extract article text."}), 400

        processed = preprocess_text(raw_text)
        sequence = tokenizer.texts_to_sequences([processed])
        padded = pad_sequences(sequence, maxlen=MAXLEN)
        prob = model.predict(padded, verbose=0)[0][0]
        credibility = "High" if prob < 0.5 else "Low"
        keywords = explain_with_lime(model, tokenizer, raw_text)

        return jsonify({
            "probability": float(prob),
            "credibility": credibility,
            "keywords": keywords
        })

    except Exception as e:
        print(f"Error occurred: {e}")
        return jsonify({"error": str(e)}), 500

#Feedback route sends feedback to my email
@app.route("/send-feedback", methods=["POST"])
def send_feedback():
    data = request.get_json()
    feedback = data.get("feedback", "")

    sender_email = "ds440capstone@gmail.com"
    receiver_email = "ds440capstone@gmail.com"
    app_password = "mvmaqmzwknkwgstb"

    subject = "New Fake News Detector Feedback"
    body = f"Feedback submitted:\n\n{feedback}"

    msg = MIMEText(body)
    msg["Subject"] = subject
    msg["From"] = sender_email
    msg["To"] = receiver_email

    try:
        with smtplib.SMTP_SSL("smtp.gmail.com", 465) as server:
            server.login(sender_email, app_password)
            server.sendmail(sender_email, receiver_email, msg.as_string())
        return jsonify({"message": "Feedback sent!"}), 200
    except Exception as e:
        print("Email failed:", e)
        return jsonify({"message": "Failed to send feedback"}), 500

#Start server with ngrok
public_url = ngrok.connect(5000)
print(f"Your public API URL: {public_url}/analyze")
app.run(port=5000)
