### Basic Setup

In [110]:
import logging
import sys
import pickle
from newspaper import Article
from newspaper import ArticleException
from telegram import ReplyKeyboardMarkup, ReplyKeyboardRemove, Update
import telegram
from telegram.ext import (
    Updater,
    CommandHandler,
    MessageHandler,
    Filters,
    ConversationHandler,
    CallbackContext,
)
from GoogleNews import GoogleNews
TOKEN = "1692364106:AAEgUHFsQVNhCq8S8FVT4bO75nbXq3Thq60"
FILE_LOCATION = "svm_model.sav"
RERUN, WAIT, PREDICT, SUGGEST = range(4)

logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                    level=logging.INFO)

logger = logging.getLogger(__name__)

### Load Model

In [111]:
class FakeNewsDetector:
    def __init__(self, file_location):
        # Loads the pre-saved model and googlenews object
        self.model = pickle.load(open(file_location, 'rb'))
        self.googlenews = GoogleNews(lang='en', encode='utf-8')
    
    def get_article(self, url):
        # Calls the Article library to download and parse the url
        article = Article(url)
        article.download()
        article.parse()
        return article
    
    def get_google_results(self, sentence):
        # Calls the GoogleNews library to search for nearest results
        # Retrieves the nearest url links and stores it in a variable named results
        # Clears the googlenews object results
        self.googlenews.search(sentence)
        results = self.googlenews.get_links()
        self.googlenews.clear()
        return results
        
    def suggest_by_keywords(self, keywords, threshold=3):
        # Get google results using keywords
        # Adds them to a suggestion list if they are reliable
        # Returns the list
        results = self.get_google_results(keywords)
        logger.info(f"SUGGESTED KEYWORDS : {keywords}")
        suggestions = []
        for news_url in results:
            logger.info(f"SUGGESTED NEWS URL : {news_url}")
            result = self.predict(news_url)
            if not result :
                suggestions.append(news_url)
            if len(suggestions) == threshold:
                break
                
        return suggestions
    
    def suggest_by_article(self, url, threshold=5):
        # Retrieves the keywords from article url
        # Calls the suggest_by_keyword function and returns the result
        article = self.get_article(url)
        logger.info(f"SUGGESTED URL : {url}")
        # Gets the keywords of the paragraph using NLTK and joins them in a sentence
        article.nlp()
        keywords = article.keywords
        sentence = ' '.join(keywords[0:3])
        return self.suggest_by_keywords(sentence)
    
    def predict(self, url):
        # Calls a prediction based on the article URL
        # Result is unreliable if 1, reliable if 0
        try:
            article = self.get_article(url)
            data = article.text
            logger.info(url)
            result = self.model.predict([data])
            if result:
                return 1
            else:
                return 0
        except ArticleException:
            return -1

In [112]:
ml_model = FakeNewsDetector(FILE_LOCATION)

### Functions to implement for telegram bot

In [113]:
def initialise(update, context):
    # Starting state when calling /start
    logger.info(f"State : INITIALISE")
    reply_keyboard = [['Verify News', 'Suggest News']]
    
    update.message.reply_text(
        "Hi! I'm a FakeNews Bot!\n"
        'Send /cancel to stop talking to me.\n'
        "Here are the things I can do currently :\n"
        "1. Predict if a news URL is true/fake\n"
        "2. Suggest an article based on keywords\n"
        'What do you want me to do?',
        reply_markup=ReplyKeyboardMarkup(reply_keyboard, one_time_keyboard=True),
    )
    return WAIT

def wait(update, context):
    # Waiting state to get the reply from the user
    # State then transition either into PREDICT/SUGGEST according to user feedback
    logger.info(f"State : WAIT")
    user = update.message.from_user
    logger.info(f"Result of choice {user.first_name} : {update.message.text}")
    if update.message.text == 'Verify News':
        update.message.reply_text('What is the article URL?',reply_markup=ReplyKeyboardRemove())
        return PREDICT
    else:
        update.message.reply_text("What keywords are you looking for?\nKeep your keywords space-separated for higher accuracy.",reply_markup=ReplyKeyboardRemove())
        return SUGGEST
    
def help(update, context):
    update.message.reply_text("Run /start to get started!")
    
def predict(update, context):
    # Gets the article URL
    # Pretends that the bot is speaking
    # Make a prediction
    logger.info(f"State : PREDICT")
    article_url = str(update.message.text)
    updater.bot.send_chat_action(chat_id=update.message.chat_id, action=telegram.ChatAction.TYPING)
    tag = ml_model.predict(article_url)
    
    # If tag is 0, reliable
    # Else make a suggestion on alternative news sources
    if tag == 0:
        update.message.reply_text("Article seems legit~")
    else:
        update.message.reply_text("Article seems unreliable. Let me find more reliable sources for you~")
        # Pretend to type to mask the delay in predictions
        updater.bot.send_chat_action(chat_id=update.message.chat_id, action=telegram.ChatAction.TYPING)
        suggested_article_urls = ml_model.suggest_by_article(article_url)
        if len(suggested_article_urls) == 0:
            update.message.reply_text("There does not seem to be any reliable sources to recommend :(")
        else :
            update.message.reply_text("Here are the top alternative sources")
            for url in suggested_article_urls:
                updater.bot.send_chat_action(chat_id=update.message.chat_id, action=telegram.ChatAction.TYPING)
                update.message.reply_text(url)
    
    return ConversationHandler.END
    
def suggest(update, context):
    # Make a suggestion based on keywords
    logger.info(f"State : SUGGEST")
    
    keywords = str(update.message.text)
    updater.bot.send_chat_action(chat_id=update.message.chat_id, action=telegram.ChatAction.TYPING)
    results = ml_model.suggest_by_keywords(keywords)
    update.message.reply_text("Here are the top 3 reliable articles")
    for url in results:
        updater.bot.send_chat_action(chat_id=update.message.chat_id, action=telegram.ChatAction.TYPING)
        update.message.reply_text(url)
    return ConversationHandler.END

def echo(update, context):
    update.message.reply_text(update.message.text)

def error(update, context):
    logger.warning(f"Update {str(update)} caused error {str(context.error)}")
    
def cancel(update, context) -> int:
    user = update.message.from_user
    logger.info("User %s canceled the conversation.", user.first_name)
    update.message.reply_text(
        'Bye! I hope we can talk again some day.', reply_markup=ReplyKeyboardRemove()
    )
    return ConversationHandler.END


In [None]:

updater = Updater(TOKEN, use_context=True)

# Get the dispatcher to register handlers
dp = updater.dispatcher


conv_handler = ConversationHandler(
    entry_points=[CommandHandler('start', initialise)],
    states = {
        WAIT : [MessageHandler(Filters.regex('^(Verify News|Suggest News)$'), wait)],
        PREDICT : [MessageHandler(Filters.text, predict)],
        SUGGEST : [MessageHandler(Filters.text, suggest)]
    },
    fallbacks=[CommandHandler('cancel', cancel)]
)

dp.add_handler(conv_handler)
dp.add_handler(CommandHandler('help', help))
updater.start_polling()
updater.idle()

2021-04-09 10:33:05,263 - apscheduler.scheduler - INFO - Scheduler started
2021-04-09 10:33:08,605 - __main__ - INFO - State : INITIALISE
2021-04-09 10:33:55,787 - __main__ - INFO - State : WAIT
2021-04-09 10:33:55,790 - __main__ - INFO - Result of choice Yu : Verify News
2021-04-09 10:33:57,938 - __main__ - INFO - State : PREDICT
2021-04-09 10:33:58,590 - __main__ - INFO - https://www.tnp.sg/news/singapore/sm-teo-will-be-acting-prime-minister-when-pm-lee-absent
2021-04-09 10:33:59,702 - __main__ - INFO - SUGGESTED URL : https://www.tnp.sg/news/singapore/sm-teo-will-be-acting-prime-minister-when-pm-lee-absent
2021-04-09 10:34:00,100 - __main__ - INFO - SUGGESTED KEYWORDS : prime teo 4g
2021-04-09 10:34:00,102 - __main__ - INFO - SUGGESTED NEWS URL : https://ca.finance.yahoo.com/news/teo-chee-hean-acting-prime-minister-pm-lee-absence-135948668.html
2021-04-09 10:34:00,738 - __main__ - INFO - https://ca.finance.yahoo.com/news/teo-chee-hean-acting-prime-minister-pm-lee-absence-135948668.h