In [7]:
import pandas as pd
import altair as alt
import numpy as np

In [8]:
alt.renderers.enable('default')
alt.data_transformers.enable('json')
tweets = pd.read_csv('tweets.csv')
tweets['emojis'] = tweets['text'].str.findall(r'[^\w\s.,"@\'?/#!$%\^&\*;:{}=\-_`~()\U0001F1E6-\U0001F1FF]').str.len()
boxer_emojis = ['☘️','🇮🇪','🍀','💸','🤑','💰','💵','😴','😂','🤣','🥊','👊','👏','🇮🇪','💪','🔥','😭','💰']
for emoji in boxer_emojis:
    tweets[emoji] = tweets.text.str.count(emoji)
tweets['irish_pride'] = tweets['☘️'] + tweets['🇮🇪'] + tweets['🍀']
tweets['money_team'] = tweets['💸'] + tweets['🤑'] + tweets['💰'] +  tweets['💵']

In [12]:
# transform text into datetime
tweets['datetime'] = pd.to_datetime(tweets['created_at'])
tweets = tweets.set_index('datetime')

teams = tweets.copy()
teams['irish_pride']
teams = teams.resample('1s').sum()
teams  = teams[(teams['💸']>0) | (teams['🤑']>0) | (teams['💰']>0) | (teams['💵']>0) | (teams['☘️']>0) | (teams['🍀']>0) | (teams['🇮🇪']>0) ]

# create mayweather emoji rolling average
mdf = teams['money_team'].rolling('4Min').mean().reset_index()
mdf['team'] = '💸🤑💰💵'
mdf = mdf.rename(columns={'money_team':'tweet_count'})

# create mcgregor emoji rolling average
idf = teams['irish_pride'].rolling('4Min').mean().reset_index()
idf['team'] = '☘️🍀🇮🇪'
idf = idf.rename(columns={'irish_pride':'tweet_count'})

ndf = pd.concat([mdf,idf])

In [13]:
# annoations to be used in visualization 
annotations = [['2017-08-27 00:15:00',4, 'Fight begins'],
               ['2017-08-27 00:22:00',5, 'McGregor does OK \nin the early rounds'],
               ['2017-08-27 00:53:00',4, 'Mayweather takes \nover and wins by \nTKO']]
a_df = pd.DataFrame(annotations, columns=['date','count','note'])

# lines to be used in visualization
plot_df = pd.DataFrame({
    'date': ['2017-08-27 00:15:00', '2017-08-27 00:15:00', '2017-08-27 00:24:00', '2017-08-27 00:30:00'],
    'y': [3.75, 2.25, 4.3, 3.8],
    'class': ['A', 'A', 'B', 'B']
})

In [16]:
alt.themes.enable('fivethirtyeight')

# plot mcgregor emojis
irish_line = alt.Chart(idf).mark_line().encode(
    x=alt.X('datetime',
           axis=alt.Axis(tickCount=4, title='')),
    y=alt.Y("tweet_count",
           axis=alt.Axis(tickCount=4, title='')),
    color=alt.Color('team:N', scale=alt.Scale(domain=['☘️🍀🇮🇪'], range=['gold']), 
                    legend=alt.Legend(title = '', orient="top", symbolType='stroke', labelFontSize=25))
)

# plot mayweather emojis
money_line = alt.Chart(mdf).mark_line().encode(
    x=alt.X('datetime',
           axis=alt.Axis(tickCount=4, title='')),
    y=alt.Y("tweet_count",
           axis=alt.Axis(tickCount=4, title='Four-minute rolling average')),
    color=alt.Color('team:N', scale=alt.Scale(domain=['💸🤑💰💵'], range=['green']), 
                    legend=alt.Legend(title = '', orient="top", symbolType='stroke', labelFontSize=25))
)

# plot lines
plot_lines = alt.Chart(plot_df).mark_line(color='black').encode(
    x=alt.X('date:T',
           axis=alt.Axis(tickCount=4, title='')),
    y=alt.Y("y",
           axis=alt.Axis(tickCount=4, title='')),
    detail='class'
)

# plot annotations
plot_annotation = alt.Chart(a_df).mark_text(size=14,lineBreak='\n').encode(
    x=alt.X('date:T',
           axis=alt.Axis(tickCount=4, title='')),
    y=alt.Y('count',
           axis=alt.Axis(tickCount=4, title='')),
    text='note'
    ).properties(
    width=500,
    height=300
)

# combine visualizations
chart = (money_line + irish_line + plot_lines + plot_annotation).resolve_scale(color='independent').properties(
    title={
        "text": "Irish Pride VS The Money Team", 
        "fontSize": 25,
        "subtitle": ["Four-minute rolling average of the number of uses of selected emoji in", 
        "sampled tweets during Mayweather-McGregor fight"],
        "subtitleFontSize": 16
    }
)

In [17]:
chart