# Group
- **Álvaro Ferreira Pires de Paiva** - 2016039162
- **Gustavo Rodarte Joaquim** - 2016048081
- **Kaio Max Marques de Medeiros** - 2014025560

In [46]:
### Library necessary to run this IPython Notebook
!pip install facebook-sdk
!pip install tqdm
!pip install requests
!pip install arrow





















# Treatment of data

In [47]:
import pandas as pd
import facebook
import requests
import arrow
from tqdm import tqdm

In [62]:
# Read env variables
ENV = pd.read_json('.env', orient='records').iloc[0]

In [63]:
graph = facebook.GraphAPI(access_token = ENV['ACCESS_TOKEN'], version = '2.7')

In [64]:
arw = arrow.utcnow()
current_month = arw.month

def extract_feed_data(feed, rows = []):
    should_finish = False

    for post in tqdm(feed['data'], 'Extracting current month data...'):
        post_month = arrow.get(post['created_time']).month
        if(post_month != current_month):
            should_finish = True
            break
        
        likes = graph.get_object(str(post['id']) + '/likes?limit=0&summary=true')
        reactions = graph.get_object(str(post['id']) + '/reactions?limit=0&summary=true')
        comments = graph.get_object(str(post['id']) + '/comments?limit=0&summary=true')

        row_data = {
            'id': post['id'],
            'created_time': post['created_time'],
            'date': arrow.get(post['created_time']).format('DD-MM-YYYY'),
            'permalink_url': post['permalink_url'],
            'type': post['type'],
            'status_type': post['status_type'],
            'message': post['message'] if 'message' in post else '-',
            'total_likes': likes['summary']['total_count'],
            'total_reactions': reactions['summary']['total_count'],
            'total_comments': comments['summary']['total_count'],
            'total_shares': post['shares']['count'] if 'shares' in post else 0
        }
        rows.append(row_data)

    if(should_finish):
        print('Month data complete!')
        return rows
    else:
        next_feed = requests.get(feed['paging']['next']).json()
        return extract_feed_data(next_feed, rows)

In [65]:
def get_page_feed_data(page):
    # Extract feed
    page_data = pd.DataFrame(extract_feed_data(page['feed']))

    # Sort page_data by 'created_time'
    page_data.sort_values(['created_time'], ascending = True, inplace = True)
    page_data.index = range(1, len(page_data) + 1)

    # Add page name
    page_name = pd.Series(page['name'], index = page_data.index, name = 'page')
    page_data = page_data.join(page_name)
    
    return page_data

In [66]:
def get_page_object(page_id):
    return graph.get_object(
        id = page_id,
        fields='name, about, category, fan_count, feed.limit(100){created_time, permalink_url, type, status_type, message, shares}'
    )

In [67]:
# Get pages objects
page_one = get_page_object(ENV['PAGE_IDS'][0])
page_two = get_page_object(ENV['PAGE_IDS'][1])

# Get pages feed data
page_feed_data_one = get_page_feed_data(page_one)
page_feed_data_two = get_page_feed_data(page_two)

# Concat pages data
pages_feed_data = pd.concat([page_feed_data_one, page_feed_data_two], ignore_index = True)


Extracting current month data...:   0%|          | 0/100 [00:00<?, ?it/s][A
Extracting current month data...:   1%|          | 1/100 [00:01<02:18,  1.40s/it][A
Extracting current month data...:   2%|▏         | 2/100 [00:02<02:17,  1.40s/it][A
Extracting current month data...:  68%|██████▊   | 68/100 [01:34<00:44,  1.40s/it]
Extracting current month data...:   0%|          | 0/100 [00:00<?, ?it/s][A

Month data complete!



Extracting current month data...:   1%|          | 1/100 [00:01<02:17,  1.39s/it][A
Extracting current month data...:   2%|▏         | 2/100 [00:02<02:17,  1.41s/it][A
Extracting current month data...:   3%|▎         | 3/100 [00:04<02:16,  1.41s/it][A
Extracting current month data...:   4%|▍         | 4/100 [00:05<02:15,  1.42s/it][A
Extracting current month data...:   5%|▌         | 5/100 [00:07<02:14,  1.41s/it][A
Extracting current month data...:  89%|████████▉ | 89/100 [02:06<00:15,  1.42s/it]

Month data complete!


In [68]:
pages_feed_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 225 entries, 0 to 224
Data columns (total 12 columns):
created_time       225 non-null object
date               225 non-null object
id                 225 non-null object
message            225 non-null object
permalink_url      225 non-null object
status_type        225 non-null object
total_comments     225 non-null int64
total_likes        225 non-null int64
total_reactions    225 non-null int64
total_shares       225 non-null int64
type               225 non-null object
page               225 non-null object
dtypes: int64(4), object(8)
memory usage: 21.2+ KB


In [69]:
pages_feed_data.tail()

Unnamed: 0,created_time,date,id,message,permalink_url,status_type,total_comments,total_likes,total_reactions,total_shares,type,page
220,2017-04-22T19:27:16+0000,22-04-2017,144112092312277_1420919194631554,"Pessoal, boa tarde. Depois de 4 meses de muito...",https://www.facebook.com/jdoriajr/videos/14209...,added_video,3572,58560,61726,1855,video,Lula
221,2017-04-23T00:16:55+0000,23-04-2017,267949976607343_1295831767152487,Lula fez palestras para a empresa OAS no exter...,https://www.facebook.com/Lula/posts/1295831767...,shared_story,1090,8493,9139,1690,link,Lula
222,2017-04-23T14:23:28+0000,23-04-2017,267949976607343_1296479237087740,-,https://www.facebook.com/Lula/posts/1296479237...,shared_story,707,7555,8641,0,photo,Lula
223,2017-04-23T17:06:04+0000,23-04-2017,144112092312277_1421948674528606,"Pessoal, a Revista Poder desse mês traz uma ma...",https://www.facebook.com/jdoriajr/posts/142194...,added_photos,390,10925,11388,518,photo,Lula
224,2017-04-23T18:56:43+0000,23-04-2017,267949976607343_1296686450400352,Na última quarta-feira (19) os advogados de Lu...,https://www.facebook.com/Lula/posts/1296686450...,shared_story,216,1105,1210,369,link,Lula


# Graphics

In [70]:
from bokeh.models import HoverTool
from bokeh.charts import Scatter, Bar
from bokeh.layouts import row, gridplot
from bokeh.io import output_notebook, show
from bokeh.plotting import figure, ColumnDataSource
import string

In [77]:
# PAGE 1
totals1 = page_feed_data_one[['total_comments', 'total_reactions', 'total_shares', 'total_likes']].groupby(page_feed_data_one['date']).sum()
dates1 = page_feed_data_one['date'].unique().tolist()
indexs1 = []

for date in page_feed_data_one['date'].unique().tolist():
    indexs1.append(int(date.split('-')[0]))

s1_comments = ColumnDataSource(
        data=dict(
            x=indexs1,
            y=totals1['total_comments'],
            dates=dates1
        )
    )
s1_reactions = ColumnDataSource(
        data=dict(
            x=indexs1,
            y=totals1['total_reactions'],
            dates=dates1
        )
    )
s1_shares = ColumnDataSource(
        data=dict(
            x=indexs1,
            y=totals1['total_shares'],
            dates=dates1
        )
    )

s1_likes = ColumnDataSource(
        data=dict(
            x=indexs1,
            y=totals1['total_likes'],
            dates=dates1
        )
    )

hover = HoverTool(
    tooltips="""
    <div>
        <p><b>Data:</b> @dates</p>
        <p><b>Total:</b> @y</p>
    </div>
    """
)

title1 = '%s - %s até %s' % (page_one['name'], dates1[0], dates1[-1])
p1 = figure(plot_width=900, plot_height=300, tools=[hover], title=title1)
p1.line('x', 'y', line_width=3, source=s1_comments, legend='Total Comments', color='orange')
p1.line('x', 'y', line_width=3, source=s1_reactions, legend='Total Reactions', color='blue')
p1.line('x', 'y', line_width=3, source=s1_shares, legend='Total Shares', color='green')
p1.line('x', 'y', line_width=3, source=s1likes, legend='Total Likes', color='red'

# PAGE 2
totals2 = page_feed_data_two[['total_comments', 'total_reactions', 'total_shares', 'total_likes']].groupby(page_feed_data_two['date']).sum()
dates2 = page_feed_data_two['date'].unique().tolist()
indexs2 = []

for date in page_feed_data_two['date'].unique().tolist():
    indexs2.append(int(date.split('-')[0]))

s2_comments = ColumnDataSource(
        data=dict(
            x=indexs2,
            y=totals2['total_comments'],
            dates=dates2
        )
    )
s2_reactions = ColumnDataSource(
        data=dict(
            x=indexs2,
            y=totals2['total_reactions'],
            dates=dates2
        )
    )
s2_shares = ColumnDataSource(
        data=dict(
            x=indexs2,
            y=totals2['total_shares'],
            dates=dates2
        )
    )

s2_likes = ColumnDataSource(
        data=dict(
            x=indexs2,
            y=totals2['total_likes'],
            dates=dates2
        )
    )

hover = HoverTool(
    tooltips="""
    <div>
        <p><b>Data:</b> @dates</p>
        <p><b>Total:</b> @y</p>
    </div>
    """
)

title2 = '%s - %s até %s' % (page_two['name'], dates2[0], dates2[-1])
p2 = figure(plot_width=900, plot_height=300, tools=[hover], title=title2)
p2.line('x', 'y', line_width=3, source=s2_comments, legend='Total Comments', color='orange')
p2.line('x', 'y', line_width=3, source=s2_reactions, legend='Total Reactions', color='blue')
p2.line('x', 'y', line_width=3, source=s2_shares, legend='Total Shares', color='green')
p2.line('x', 'y', line_width=3, source=s2_likes, legend='Total Likes', color='red'

grid = gridplot([p1, p2], ncols=1)

output_notebook()
show(grid)

SyntaxError: invalid syntax (<ipython-input-77-d3fe8f4ba263>, line 56)

In [72]:
p1_reactions = Scatter(
        page_feed_data_one,
        x='date',
        y='total_reactions', 
        xlabel='Date',
        ylabel='Total Reactions', 
        color='type',
        title='Date vs Total Reactions - ' + page_one['name']
)

p2_reactions = Scatter(
        page_feed_data_two,
        x='date',
        y='total_reactions', 
        xlabel='Date',
        ylabel='Total Reactions', 
        color='type',
        title='Date vs Total Reactions - ' + page_two['name']
)

p1_shares = Scatter(
        page_feed_data_one,
        x='date',
        y='total_shares', 
        xlabel='Date',
        ylabel='Total Shares', 
        color='type',
        title='Date vs Total Shares - ' + page_one['name']
)

p2_shares = Scatter(
        page_feed_data_two,
        x='date',
        y='total_shares', 
        xlabel='Date',
        ylabel='Total Shares', 
        color='type',
        title='Date vs Total Shares - ' + page_two['name']
)


grid = gridplot([p1_reactions, p2_reactions, 
                p1_shares, p2_shares], ncols=2, sizing_mode='scale_width')


p2_reactions.x_range = p1_reactions.x_range

# Link the y_range of p2 to p1: p2.y_range
p2_reactions.y_range = p1_reactions.y_range

# Link the x_range of p3 to p1: p3.x_range
p1_shares.x_range = p1_reactions.x_range

p1_shares.y_range = p1_reactions.y_range
# Link the y_range of p4 to p1: p4.y_range
p2_shares.y_range = p1_reactions.y_range

p2_shares.x_range = p1_reactions.x_range

output_notebook()
show(grid)

In [73]:
p_reactions = Bar(
    pages_feed_data,
    label='date',
    values='total_reactions',
    agg='sum',
    stack='page',
    xlabel='Date',
    ylabel='Total Reactions',
    title="Sum Post Total Reactions by Date, stacked by Page",
    legend='top_right'
)

p_comments = Bar(
    pages_feed_data,
    label='date',
    values='total_comments',
    agg='sum',
    stack='page',
    xlabel='Date',
    ylabel='Total Com',
    title="Sum Post Total Comments by Date, stacked by Page",
    legend='top_right'
)

p_likes = Bar(
    pages_feed_data,
    label='date',
    values='total_likes',
    agg='sum',
    stack='page',
    xlabel='Date',
    ylabel='Total Com',
    title="Sum Post Total Comments by Date, stacked by Page",
    legend='top_right'
)

p_shares = Bar(
    pages_feed_data,
    label='date',
    values='total_shares',
    agg='sum',
    stack='page',
    xlabel='Date',
    ylabel='Total Com',
    title="Sum Post Total Comments by Date, stacked by Page",
    legend='top_right'
)

output_notebook()

grid = gridplot([p_likes, p_reactions, 
                p_shares, p_comments], ncols=2, sizing_mode='scale_width')

show(grid)

# References 
- https://facebook-sdk.readthedocs.io/en/latest/api.html
- https://developers.facebook.com/docs/graph-api/reference/v2.7