<h1>Flair Prediction Using Saved Models</h1>
<h3>Import trained models and Predict</h3>

<h2>Import Python Modules and Dependencies</h2>

In [None]:
# Modules
import pickle
import praw
import requests 
import numpy as np
import pandas as pd
import json
import time
import string
import sys

# NLP PreProcessors
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

<h2>Helper Functions to Pre-Process Data</h2>

In [None]:
def cleanText(inputText):
    if(type(inputText)==float):
        inputText = ''
    inputText = str((inputText.encode('ascii', 'ignore')).decode('utf-8')).lower().split()
    specialChars = string.punctuation.replace('#','').replace('+','').replace('_','')
    table = str.maketrans('', '', specialChars)
    words = [w.translate(table) for w in inputText]
    stop_words = set(stopwords.words('english'))
    words = [w for w in words if not w in stop_words]
    porter = PorterStemmer()
    stemmed = [porter.stem(word) for word in words]
    words = ' '.join(stemmed)
    return words

def splitUrl(inputText):
    inputText = inputText.lower().split('/')
    inputText = filter(None, inputText)
    inputText = [x for x in inputText if ((x != 'https:') and (x != 'http:'))]
    inputText[0] = inputText[0].split('.')
    inputText[0] = [x for x in inputText[0] if ((x != 'com') and (x != 'www'))]
    inputText[0] = ' '.join(inputText[0])
    words = ' '.join(inputText)
    return words

def classifyTime(inputText):
    hours = time.localtime(int(inputText)).tm_hour
    if(hours in range(6,12)):
        return 'Morning'
    elif(hours in range(12, 17)):
        return 'Noon'
    elif(hours in range(17, 21)):
        return 'Evening'
    else:
        return 'Night'

<h3>Extract relevant data from input link given</h3>

In [None]:
def getPostDetails(redditUrl,dataDictionary):
    try:
        post = reddit.submission(url=redditUrl)
    except:
        return False
    
    url = 'https://api.pushshift.io/reddit/submission/search/?subreddit=india&ids=' + str(post.id)
    req = json.loads(requests.get(url).text)
    
    if(len(req['data']) != 1):
        return False
    
    commentText = ''
    post.comments.replace_more(limit=0)
    comments = post.comments.list()
    for comment in comments:
        if(comment.is_root):
            commentText += str(comment.body)+' '
    
    submission = req['data'][0]
    dataDictionary['author_fullname'].append(str(submission.setdefault('author_fullname', 'null')))
    dataDictionary['created_utc'].append(submission.setdefault('created_utc', 0))
    dataDictionary['domain'].append(str(submission.setdefault('domain', 'null')))
    dataDictionary['is_crosspostable'].append(submission.setdefault('is_crosspostable', 'false'))
    dataDictionary['is_reddit_media_domain'].append(submission.setdefault('is_reddit_media_domain', 'false'))
    dataDictionary['post_hint'].append(str(submission.setdefault('post_hint', 'null')))
    dataDictionary['num_comments'].append(submission.setdefault('num_comments', 0))
    dataDictionary['permalink'].append(str(submission.setdefault('permalink', 'null')))
    dataDictionary['score'].append(submission.setdefault('score', 0))
    dataDictionary['selftext'].append(str(submission.setdefault('selftext', 'null')))
    dataDictionary['title'].append(str(submission.setdefault('title', 'null')))
    dataDictionary['url'].append(str(submission.setdefault('url', 'null')))
    dataDictionary['comments'].append(str(commentText))
    return True

<h3>Make Predictions</h3>

In [None]:
reddit = praw.Reddit(client_id='CLIENT_ID',
                     client_secret='CLIENT_SECRET',
                     user_agent='default')

dataDictionary = {'author_fullname': [],
                  'created_utc' : [],
                  'domain' : [],
                  'is_crosspostable' : [],
                  'is_reddit_media_domain' : [],
                  'post_hint' : [],
                  'num_comments' : [],
                  'permalink' : [],
                  'score' : [],
                  'selftext' : [],
                  'title' : [],
                  'url' : [],
                  'comments' : []
                }
    
res = getPostDetails('https://www.reddit.com/r/india/comments/cfw/',dataDictionary)
if(res != True):
#     Handle Error
    print('Test')

else:
    pandasFrame = pd.DataFrame(dataDictionary)

    pandasFrame['created_utc'] = pandasFrame['created_utc'].apply(classifyTime)
    pandasFrame['domain'] = pandasFrame['domain'].apply(splitUrl)
    pandasFrame['post_hint'] = pandasFrame['post_hint'].apply(cleanText)
    pandasFrame['permalink'] = pandasFrame['permalink'].apply(splitUrl)
    pandasFrame['selftext'] = pandasFrame['selftext'].apply(cleanText)
    pandasFrame['title'] = pandasFrame['title'].apply(cleanText)
    pandasFrame['url'] = pandasFrame['url'].apply(splitUrl)
    pandasFrame['comments'] = pandasFrame['comments'].apply(cleanText)
    pandasFrame = pandasFrame.replace(r'^\s*$', np.nan, regex=True)
    pandasFrame = pandasFrame.replace(np.nan, '')

    pickle_in_Model = open("trainedModel.pickle","rb")
    pickle_in_Vectorizer = open("vectorizer.pickle","rb")
    pickle_in_Labels = open("labels.pickle","rb")

    model = pickle.load(pickle_in_Model)
    tfidVectorizer = pickle.load(pickle_in_Vectorizer)
    labels = pickle.load(pickle_in_Labels)

    newDF = pd.DataFrame()

    for column in pandasFrame.columns:
        if(pandasFrame[column].dtype == 'object'):
            temp = pd.DataFrame(tfidVectorizer[column].transform(pandasFrame[column]).todense(),columns=tfidVectorizer[column].get_feature_names())
            newDF = pd.concat([newDF,temp], axis=1)
            pandasFrame = pandasFrame.drop(columns=column)

    #  Indexing Problem Resolution
    pandasFrame.reset_index(drop=True, inplace=True)
    newDF.reset_index(drop=True, inplace=True)

    pandasFrame = pd.concat([pandasFrame, newDF], axis=1)

    prediction = model.predict_proba(pandasFrame)
    best_3 = np.flip(np.argsort(prediction, axis=1)[:,-3:] ,1)
    prediction = [labels[x] for x in best_3]
    print(prediction)