In [None]:
##### Packages Used #####
import dash
import dash.dependencies as dd
import dash_core_components as dcc
import dash_html_components as html
import re
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
import plotly.express as px
import datetime
import plotly.graph_objects as go
from wordcloud import WordCloud, STOPWORDS , ImageColorGenerator
import nltk
from textblob import TextBlob
import numpy
import warnings
from io import BytesIO
import base64
import os

##### Import Twitter Data #####
twit_vac_data = pd.read_csv('https://github.com/AlexHumfrey/Python_Coursework/raw/main/vaccination_all_tweets.csv')
list(twit_vac_data)

##### Pre-processing Twitter Data/Cleaning Text #####
twit_vac = twit_vac_data
twit_vac.text = twit_vac.text.apply(lambda x:re.sub('@[^\s]+','',x)) # Remove twitter handlers
twit_vac.text = twit_vac.text.apply(lambda x:re.sub(r'\B#\S+','',x)) # Remove hashtags
twit_vac.text = twit_vac.text.apply(lambda x:re.sub(r"http\S+", "", x)) # Remove URLS
twit_vac.text = twit_vac.text.apply(lambda x:re.sub(r"https\S+", "", x))
twit_vac.text = twit_vac.text.apply(lambda x:' '.join(re.findall(r'\w+', x))) # Remove all the special characters
twit_vac.text = twit_vac.text.apply(lambda x:re.sub(r'\s+[a-zA-Z]\s+', '', x)) # Remove all single characters
twit_vac.text = twit_vac.text.apply(lambda x:re.sub(r'\s+', ' ', x, flags=re.I)) # Substituting multiple spaces with single space
tweet_All = ' '.join(twit_vac['text'])

##### Sentiment Analysis of Twitter Data Using TextBlob #####
twit_vac['polarity'] = twit_vac['text'].apply(lambda x: TextBlob(x).sentiment.polarity)
twit_vac['subjectivity'] = twit_vac['text'].apply(lambda x: TextBlob(x).sentiment.subjectivity)
twit_vac.head()
twit_vac['date'] = pd.to_datetime(twit_vac['date']) # Parse 'date' into datetime format YYYY/MM/DD

##### Frequency of Positive, Negative and Neutral Tweets #####
warnings.filterwarnings("ignore")
sentiment_results =  twit_vac[['date','text','polarity']]
sentiment_results['polarity'][sentiment_results['polarity'] <= -0.25] = -1
sentiment_results['polarity'][sentiment_results['polarity'] >= 0.25] = 1
sentiment_results['polarity'][(sentiment_results['polarity'] < 0.25) & (sentiment_results['polarity'] > -0.25)] = 0
polarity = sentiment_results['polarity'].astype('str')
polarity = polarity.str.replace('-1.0','Negative')
polarity = polarity.str.replace('1.0','Positive')
polarity = polarity.str.replace('0.0','Neutral')
counts = polarity.value_counts()
warnings.resetwarnings()

#### Histogram Plot #####
fig1 = px.histogram(polarity, title = "Histogram of Sentiment Analysis Results"
                    ,labels = {'value':''}, color = "value",height=600, width=600)         

##### Sentiment Analysis Results/Plots #####
tpd = pd.DataFrame(twit_vac.groupby(pd.Grouper(key='date', freq='D', convention = 'start')).size()) # Group tweet frequency per day
tpd = tpd.reset_index()
tpd.columns = ['Date','Daily Tweet Count']
fig2_1 = go.Scatter(x=tpd['Date'], y=tpd['Daily Tweet Count'], name = "Tweet Frequency") # Plot

mtp = pd.DataFrame(twit_vac.groupby(pd.Grouper(key='date', freq='D', convention = 'start')).mean()) # Mean daily tweet polarity
mtp = mtp.reset_index()
fig2_2 = go.Scatter(x=mtp['date'], y=mtp['polarity'], name = "Mean Tweet Polarity", yaxis = "y2") # Plot

data = [fig2_1,fig2_2]
layout = go.Layout(title = "Covid19: Daily Tweet Frequency Count & Mean Tweet Polarity, December 2020 - April 2021"
                , legend=dict(traceorder="reversed"), xaxis = dict(title = '')
                , yaxis = dict(title = 'Tweet Frequency', domain = [0,0.45], titlefont = dict(
                    family = 'Arial, sans-serif', size = 12))
                , yaxis2 = dict(title = 'Mean Tweet Polarity', domain = [0.55,1], titlefont = dict(
                    family = 'Arial, sans-serif', size = 12)), template = "plotly_white", margin = dict(r=20, t=100, b=20, l=50))

fig2 = go.Figure(data=data, layout=layout)
fig2.update_xaxes(rangeslider_visible = True, rangeselector = dict( buttons = list([
            dict(count = 1, label = "Past 12 Months", step = "year", stepmode = "backward"),
            dict(count = 1, label = "2021-Present", step = "year", stepmode = "todate"),
            dict(label = "All", step = "all")])))

##### Timeseries Plots of Retweet and Favourite Activity #####
fig3 = make_subplots(rows=1, cols=2,column_widths=[0.5, 0.5],row_heights=[1],
                     specs=[[{"type": "scatter"}, {"type": "scatter"}]])

fig3_1 = go.Scatter(x=twit_vac_data['date'], y=twit_vac_data['retweets'], name = "Retweets")
fig3_2 = go.Scatter(x=twit_vac_data['date'], y=twit_vac_data['favorites'], name = "Favourites", yaxis = 'y2')
data = [fig3_1,fig3_2]
layout = go.Layout(title = "Covid19: Retweets and Favourites Timeseries, December 2020 - April 2021"
                , legend=dict(traceorder="reversed"), xaxis = dict(title = '')
                , yaxis = dict(title = 'Retweets', domain = [0,0.45], titlefont = dict(
                    family = 'Arial, sans-serif', size = 12))
                , yaxis2 = dict(title = 'Favourites', domain = [0.55,1], titlefont = dict(
                    family = 'Arial, sans-serif', size = 12)), template = "plotly_white", margin = dict(r=20, t=100, b=20, l=50))
fig3 = go.Figure(data=data, layout=layout)
fig3.update_xaxes(rangeslider_visible=True,rangeselector=dict(buttons=list([
            dict(count=1, label="Past 12 Months", step="year", stepmode="backward"),
            dict(count=1, label="2021-Present", step="year", stepmode="todate"),
            dict(label="All", step="all")])))                 

##### Shared x axis #####
fig_1 = go.Scatter(x=tpd['Date'], y=tpd['Daily Tweet Count'], name = "Daily Tweet Frequency")
fig_2 = go.Scatter(x=mtp['date'], y=mtp['polarity'], name = "Daily Mean Tweet Polarity",yaxis='y2')
fig_3 = go.Scatter(x=twit_vac_data['date'], y=twit_vac_data['retweets'], name = "Retweets",yaxis='y3')
fig_4 = go.Scatter(x=twit_vac_data['date'], y=twit_vac_data['favorites'], name = "Favourites",yaxis='y4')

data = [fig_1,fig_2,fig_3,fig_4]
layout = go.Layout(legend=dict(traceorder="reversed"),height=900, width=1800
                ,title = "Covid19: Analysis of Tweets, December 2020 - April 2021"
                , xaxis = dict(title = '',rangeslider_visible=True,rangeselector=dict(buttons=list([
            dict(label="Reset", step="all")])))
                , yaxis = dict(title = 'Tweet Frequency', titlefont = dict(
                    family = 'Arial, sans-serif', size = 10), anchor = "x", domain = [0,0.2])
                , yaxis2 = dict(title = 'Mean Tweet Polarity', titlefont = dict(
                    family = 'Arial, sans-serif', size = 10), anchor = "x", domain = [0.25,0.45])
                , yaxis3 = dict(title = 'Retweets', titlefont = dict(
                    family = 'Arial, sans-serif', size = 10), anchor = "x", domain = [0.5,0.7])
                , yaxis4 = dict(title = 'Favourites', titlefont = dict(
                    family = 'Arial, sans-serif', size = 10), anchor = "x", domain = [0.75,1])
                ,template = "plotly_white", margin = dict(r=20, t=100, b=20, l=50))
fig = go.Figure(data=data,layout=layout)




##### Full Wordcloud #####
STOPWORDS.update(['done', 'amp']) # Add custom stop words
wordcloud = WordCloud(max_font_size=50, max_words=100,background_color='white'
                      , colormap='Set2', random_state = 2, stopwords = STOPWORDS).generate(tweet_All)  
plt.figure(figsize=(400, 400)) # Set figure size
plt.imshow(wordcloud, interpolation ='bilinear') # Display image
plt.axis("off"); # No axis details
plt.savefig('wordcloud1.png') # Save wordcloud as .png to be read into dash

##### March/April Wordcloud #####
mar_apr_tweets = twit_vac[(twit_vac['date']> '2021-03-01') & (twit_vac['polarity']<-0.5)] # show most negative tweets in april
neg_tweets = mar_apr_tweets.nsmallest(10,['polarity'])
pd.options.display.max_colwidth = 100
neg_tweets['text']
tweet_marapr = ' '.join(mar_apr_tweets['text'])

STOPWORDS.update(['done', 'amp','vaccine','Ontario','Canada','Doug','Ford']) # add custom stop words
wordcloud = WordCloud(max_font_size=50, max_words=100,background_color='black'
                      , colormap='Set2', random_state = 2, stopwords = STOPWORDS).generate(tweet_marapr)
plt.figure(figsize=(40, 40)) # Set figure size
plt.imshow(wordcloud, interpolation ='bilinear') # Display image
plt.axis("off"); # No axis details
plt.savefig('wordcloud2.png') # Save wordcloud as .png to be read into dash

##### Initialise Dash App #####
assets_path = os.getcwd()
app = dash.Dash(__name__,assets_folder=assets_path)

wc1 = 'wordcloud1.png'
encoded_image = base64.b64encode(open(wc1, 'rb').read()) # Encodes image to be decoded inside of Dash (one approach)


##### Dash App Layout #####
app.layout = html.Div(children=[
    html.H1(children='BSP417 Dashboard'),html.Div(children='''Coursework analytics dashboard.'''),
    dcc.Graph(id='Histogram of Sentiment Analysis Results',figure=fig1),
    dcc.Graph(id='Combined Plot',figure=fig),
    #dcc.Graph(id='Covid19: Daily Tweet Frequency Count & Mean Tweet Polarity, December 2020 - April 2021',figure=fig2),
    #dcc.Graph(id='Timeseries',figure=fig3)
    html.Img(src='wordcloud1', height = 600, width = 800), # Attempt at standard read in
    html.Img(src='wordcloud2.png', height = 600, width = 800),# Attempt at standard read in
    html.Img(src='data:image/png;base64,{}'.format(encoded_image.decode()), height = 600, width = 800) # Attempt at reading in encoded image
])

if __name__ == '__main__':
    app.run_server(debug=False)


invalid escape sequence \s


invalid escape sequence \s


invalid escape sequence \s



Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off



unclosed file <_io.BufferedReader name='wordcloud1.png'>

 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [21/Apr/2021 15:01:16] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [21/Apr/2021 15:01:17] "[37mGET /_dash-dependencies HTTP/1.1[0m" 200 -
127.0.0.1 - - [21/Apr/2021 15:01:19] "[37mGET /_dash-layout HTTP/1.1[0m" 200 -
127.0.0.1 - - [21/Apr/2021 15:01:20] "[37mGET /wordcloud1 HTTP/1.1[0m" 200 -
127.0.0.1 - - [21/Apr/2021 15:01:20] "[37mGET /wordcloud2.png HTTP/1.1[0m" 200 -
127.0.0.1 - - [21/Apr/2021 15:01:52] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [21/Apr/2021 15:01:52] "[37mGET /_dash-dependencies HTTP/1.1[0m" 200 -
127.0.0.1 - - [21/Apr/2021 15:01:55] "[37mGET /_dash-layout HTTP/1.1[0m" 200 -
127.0.0.1 - - [21/Apr/2021 15:01:55] "[37mGET /wordcloud1 HTTP/1.1[0m" 200 -
127.0.0.1 - - [21/Apr/2021 15:01:55] "[37mGET /wordcloud2.png HTTP/1.1[0m" 200 -
