### Import Libraries

In [1]:
# importing required libraries
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
# import joblib
from joblib import dump
# import joblib
from joblib import load

from wordcloud import WordCloud
import re
from nltk.corpus import stopwords
# To sort dictionary values
import operator 
## Preprocessing
import pandas as pd
pd.set_option('display.max_colwidth', -1)
import os


### Connect to Twitter

In [2]:
import tweepy
import config
    
# initialize api instance\n
consumer_key= config.consumer_key
consumer_secret= config.consumer_secret
access_token=config.access_token
access_token_secret =config.access_token_secret

#Connect to Twitter through the API
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret) 
api = tweepy.API(auth,wait_on_rate_limit=True) 


### Get Twitter Trends

In [3]:
def get_trends_by_location(loc_id,count):
    '''Get Trending Tweets by Location'''
    import iso639
    import numpy as np
    from langdetect import detect
    df = pd.DataFrame([])
    try:
        trends = api.trends_place(loc_id)
        df = pd.DataFrame([trending['name'],  trending['tweet_volume'], iso639.to_name(detect(trending['name']))] for trending in trends[0]['trends'])
        df.columns = ['Trends','Volume','Language']
        #df = df.sort_values('Volume', ascending = False)
        return(df[:count])
    except Exception as e:
        pass
        print("An exception occurred: ",e)
        df = pd.DataFrame([trending['name'],  trending['tweet_volume'], np.nan] for trending in trends[0]['trends'])
        df.columns = ['Trends','Volume','Language']
        return(df[:count])
    

### Get Worldwide Twitter Trends

In [4]:
df_world_trends = get_trends_by_location(1, 20)
df_world_trends.head(10)

Unnamed: 0,Trends,Volume,Language
0,#AskCuppyAnything,12405.0,English
1,Carrefour,407859.0,English
2,Taysom Hill,17839.0,Swedish
3,#StreamLifeGoesOn,152933.0,English
4,#VidasNegrasImportam,87607.0,Portuguese
5,#TransDayOfRemembrance,37338.0,French
6,#TeröristSelo,15239.0,German
7,Mourão,,Portuguese
8,Geraldo,26063.0,German
9,Edward Norton,11207.0,English


### Get Translated Tweets

In [5]:
def get_translation(text):
    ''' Translate Tweets in English'''
    from googletrans import Translator  # Import Translator module from googletrans package
    try:
        translator = Translator() # Create object of Translator.
        translated = translator.translate(text)
        return(translated.text)
    except Exception as e:
        #print("Exception in get_translation", e)
        return 'NA'

In [6]:
df_world_trends["Translated_Trends"] = [get_translation(val) for val in df_world_trends.Trends]
df_world_trends[["Trends","Language","Translated_Trends"]].head(2)

Unnamed: 0,Trends,Language,Translated_Trends
0,#AskCuppyAnything,English,#AskCuppyAnything
1,Carrefour,English,Carrefour


### Get Tweets for a Hashtag

In [7]:
def get_related_tweets(search_keyword):
    ''' collect tweets '''
    try: 
        count = 50
        # Create Blank Dataframe\n",
        df_tweets = pd.DataFrame(pd.np.empty((0, 1)))
        for keyword in search_keyword:
            # Remove Retweets
            search_tag = keyword +  "-filter:retweets" +  "-filter:media"
            
            print('Searching tweets for: ', search_tag)
    
            fetched_tweets = tweepy.Cursor(api.search,
                                q=search_tag,
                                lang="en").items(50)
            # Add records to the dataframe
            df_tweets = df_tweets.append([[tweet.text] for tweet in fetched_tweets])
            # Add columns
            df_tweets.columns = ['tweets']
            #clean emojis and pictures from tweets
            df_tweets['tweets'] = df_tweets['tweets'].str.replace(r'[^\x00-\x7F]+', '', regex=True)
            # Retuen Data
            return(df_tweets)
    except Exception as e:
        print('Encountered Exception:', e)
        return None

### Create a Random Forest  Model

In [11]:
# importing required libraries
import pandas as pd
import matplotlib.pyplot as plt
import re

from nltk.corpus import stopwords
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS, TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
import pickle


## ---------------------------------
##Read Data 
## ---------------------------------
    
df = pd.read_csv("twitter_sentiments.csv")

# Delete Columns
del df["SentimentSource"]
del df["Unnamed: 4"]
del df["Unnamed: 5"]
del df["Unnamed: 6"]
del df["ItemID"]

df.head(2)

Unnamed: 0,Sentiment,SentimentText
0,0,is so sad for my APL friend.............
1,0,I missed the New Moon trailer...


In [None]:
## ---------------------------------
##Data Preprocessing 
# ---------------------------------
# convert to lower case
df['clean_tweet'] = df['SentimentText'].str.lower()
# Remove punctuations
df['clean_tweet'] = df['clean_tweet'].str.replace('[^\w\s]',' ')
# Remove spaces in between words
df['clean_tweet'] = df['clean_tweet'].str.replace(' +', ' ')
# Remove Numbers
df['clean_tweet'] = df['clean_tweet'].str.replace('\d+', '')
# Remove trailing spaces
df['clean_tweet'] = df['clean_tweet'].str.strip()
# Remove URLS
df['clean_tweet'] = df['clean_tweet'].apply(lambda x: re.split('https:\/\/.*', str(x))[0])
# remove stop words
stop = stopwords.words('english')
stop.extend(["racism","alllivesmatter","amp","https","co","like","people","black","white"])
df['clean_tweet'] =  df['clean_tweet'].apply(lambda x: " ".join(x for x in x.split() if x not in stop ))


df.head(2)

In [None]:
## ---------------------------------
##Prepare Random Forest Model
##Define Pipeline Stages
## ---------------------------------

tfidf_vectorizer = TfidfVectorizer(lowercase= True, max_features=1000, stop_words=ENGLISH_STOP_WORDS)
tfidf_vectorizer.fit(df.clean_tweet)

# transform the train and test data,
tweets_idf = tfidf_vectorizer.transform(df.clean_tweet)


# create the object of Random Forest Model
model_RF = RandomForestClassifier(n_estimators=100)
model_RF.fit(tweets_idf, df.Sentiment)

pipeline = Pipeline(steps= [('tfidf', TfidfVectorizer(lowercase=True,
                                                      max_features=1000,
                                                      stop_words= ENGLISH_STOP_WORDS)),
                                                      ('model', RandomForestClassifier(n_estimators = 100))])

# fit the pipeline model with the training data                            
pipeline.fit(df.clean_tweet, df.Sentiment)
    
# save the model
#dump(pipeline, open('model.pkl', 'wb'))        

### Dump the Pipeline Model

In [None]:
# import joblib
from joblib import dump

# dump the pipeline model
dump(pipeline, filename="text_classification.joblib", compress= True)

### Predict Emotion behind tweets

In [None]:
def predict_emotion(tweets):
    '''Predict Emotions behind tweets'''
    from sklearn.pipeline import Pipeline
    from joblib import load
    try:
        # load the model
        # load the saved pipleine model
        pipeline = load("text_classification.joblib")
        # get the prediction
        tweets['Prediction'] = pipeline.predict(tweets['tweets'])
        return tweets
    except Exception as e:
        print("Exception in predict_emotion: ", e)

### Clean the Tweets

In [None]:
def data_cleaning(df_tweets):
    '''Clean the Tweets'''
    # convert to lower case
    df_tweets['clean_text'] = df_tweets['tweets'].str.lower()
    # Remove punctuations
    df_tweets['clean_text'] = df_tweets['clean_text'].str.replace('[^\w\s]',' ')
    # Remove spaces in between words
    df_tweets['clean_text'] = df_tweets['clean_text'].str.replace(' +', ' ')
    # Remove Numbers
    df_tweets['clean_text'] = df_tweets['clean_text'].str.replace('\d+', '')
    # Remove trailing spaces
    df_tweets['clean_text'] = df_tweets['clean_text'].str.strip()
    # Remove URLS
    df_tweets['clean_text'] = df_tweets['clean_text'].apply(lambda x: re.split('https:\/\/.*', str(x))[0])
    # remove stop words
    stop = stopwords.words('english')
    stop.extend(["amp","https","co","rt","new","let","also","still","one","people","gt"])
    df_tweets['clean_text'] =  df_tweets['clean_text'].apply(lambda x: " ".join(x for x in x.split() if x not in stop ))

    # Remove Text Column
    del df_tweets['tweets']
    # Rename the clean_text column as tweets
    df_tweets.rename(columns = {'clean_text':'Tweets'}, inplace = True) 
    return(df_tweets)

### Test the function for a hashtag

In [None]:
search_keyword = 'King Von'
# Get tweets for a hashtag
df_tweets = get_related_tweets(search_keyword)
# Predict Emotion for the tweets
df_tweets = predict_emotion(df_tweets)
# Clean the tweets
df_tweets = data_cleaning(df_tweets)
df_tweets.head()

In [None]:
from matplotlib.pyplot import *

ax = df_tweets.Prediction.value_counts().plot(kind = "bar")
ax.legend(["Positive","Negative"],loc='upper right')

### Connect webpage and Model

In [None]:
# importing the required libraries
from flask import Flask, render_template, request, redirect, url_for, Response
from flask_table import Table, Col
from joblib import load
from flask import send_file
#import base64
from io import BytesIO
#import io
#import random
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from matplotlib.figure import Figure
import random, threading, webbrowser


####------function to get trening tweets----####
def gettrends():
    # get the twitter trends
    df_world_trends = get_trends_by_location(1, 10)
    df_world_trends["Translated_Trends"] = [get_translation(val) for val in df_world_trends.Trends]
    return df_world_trends[["Trends","Language","Translated_Trends"]]

####------ function to get results for a particular text query----####
def requestResults(search_keyword):
    # get the tweets text
    df_tweets = get_related_tweets(search_keyword)
    # Predict Emotion for the tweets
    df_tweets = predict_emotion(df_tweets)
    # Clean the tweets
    df_tweets = data_cleaning(df_tweets)
   

    return df_tweets

####------ function to create a plot ----####
def create_plot():
    fig = Figure()
    img = df_tweets.Prediction.value_counts().plot(kind = "bar")
    return fig



# start flask
app = Flask(__name__)

####------render default webpage ----####
@app.route('/')
def home():
    return render_template('home.html')

# when the post method detect, then redirect to trends page
@app.route('/', methods=['POST', 'GET'])
def get_trends():
    if request.method == 'POST':
        trends = gettrends()
        return render_template('trends.html', table=trends.to_html())


# when the post method detect, create a url for success
@app.route('/get_data', methods=['POST', 'GET'])
def get_data():
    if request.method == 'POST':
        name = request.form['search']
        return redirect(url_for('success', name=name))

#create a Plot and send a png file
@app.route('/plot_png')
def plot_png():
    fig = create_plot(results)
    img = BytesIO()
    plt.savefig(img)
    img.seek(0)
    #plot_url = base64.b64encode(img.getvalue())
    return send_file(img, mimetype='image/png')

####------when the post method detect, then redirect to results page
@app.route('/success/<name>')
def success(name):
    #return "<xmp>" + str(requestResults(name)) + " </xmp> "
    results = requestResults(name)
    plot_url = plot_png()
    return render_template('results.html', img=plot_url, table=results.to_html())


if __name__ == '__main__':
    #port = 5000 + random.randint(0, 999)
    #print(port)
    #url = "http://127.0.0.1:{0}".format(port)
    #print(url)
    #app.run(use_reloader=False, debug=True, port=port)
    app.run(use_reloader=False, debug=True)
    