In [1]:
### Library necessary to run this IPython Notebook
!pip install facebook-sdk
!pip install tqdm
!pip install requests
!pip install arrow



In [2]:
import pandas as pd
import facebook
import requests
import arrow
from tqdm import tqdm

In [3]:
# Read env variables
ENV = pd.read_json('.env', orient='records').iloc[0]

In [4]:
graph = facebook.GraphAPI(access_token = ENV['ACCESS_TOKEN'], version = '2.7')

In [5]:
arw = arrow.utcnow()
current_month = arw.month

def extract_feed_data(feed, rows = []):
    should_finish = False

    for post in tqdm(feed['data'], 'Extracting current month data...'):
        post_month = arrow.get(post['created_time']).month
        if(post_month != current_month):
            should_finish = True
            break
        
        likes = graph.get_object(str(post['id']) + '/likes?limit=0&summary=true')
        reactions = graph.get_object(str(post['id']) + '/reactions?limit=0&summary=true')
        comments = graph.get_object(str(post['id']) + '/comments?limit=0&summary=true')

        row_data = {
            'id': post['id'],
            'created_time': post['created_time'],
            'date': arrow.get(post['created_time']).format('DD-MM-YYYY'),
            'permalink_url': post['permalink_url'],
            'type': post['type'],
            'status_type': post['status_type'],
            'message': post['message'] if 'message' in post else '-',
            'total_likes': likes['summary']['total_count'],
            'total_reactions': reactions['summary']['total_count'],
            'total_comments': comments['summary']['total_count'],
            'total_shares': post['shares']['count'] if 'shares' in post else 0
        }
        rows.append(row_data)

    if(should_finish):
        print('Month data complete!')
        return rows
    else:
        next_feed = requests.get(feed['paging']['next']).json()
        return extract_feed_data(next_feed, rows)

In [6]:
def get_page_feed_data(page):
    # Extract feed
    page_data = pd.DataFrame(extract_feed_data(page['feed']))

    # Sort page_data by 'created_time'
    page_data.sort_values(['created_time'], ascending = True, inplace = True)
    page_data.index = range(1, len(page_data) + 1)

    # Add page name
    page_name = pd.Series(page['name'], index = page_data.index, name = 'page')
    page_data = page_data.join(page_name)
    
    return page_data

In [7]:
def get_page_object(page_id):
    return graph.get_object(
        id = page_id,
        fields='name, about, category, fan_count, feed.limit(100){created_time, permalink_url, type, status_type, message, shares}'
    )

In [8]:
# Get pages objects
page_one = get_page_object(ENV['PAGE_IDS'][0])
page_two = get_page_object(ENV['PAGE_IDS'][1])

# Get pages feed data
page_feed_data_one = get_page_feed_data(page_one)
page_feed_data_two = get_page_feed_data(page_two)

# Concat pages data
pages_feed_data = pd.concat([page_feed_data_one, page_feed_data_two], ignore_index = True)

Extracting current month data...:  79%|███████▉  | 79/100 [01:54<00:29,  1.43s/it]
Extracting current month data...:   0%|          | 0/100 [00:00<?, ?it/s][A

Month data complete!



Extracting current month data...:   1%|          | 1/100 [00:01<02:15,  1.36s/it][A
Extracting current month data...:   2%|▏         | 2/100 [00:02<02:13,  1.36s/it][A
Extracting current month data...:   3%|▎         | 3/100 [00:04<02:11,  1.35s/it][A
Extracting current month data...:   4%|▍         | 4/100 [00:05<02:08,  1.34s/it][A
Extracting current month data...: 100%|██████████| 100/100 [02:18<00:00,  1.57s/it]
Extracting current month data...:  15%|█▌        | 15/100 [00:21<01:57,  1.38s/it]

Month data complete!


In [9]:
pages_feed_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 273 entries, 0 to 272
Data columns (total 12 columns):
created_time       273 non-null object
date               273 non-null object
id                 273 non-null object
message            273 non-null object
permalink_url      273 non-null object
status_type        273 non-null object
total_comments     273 non-null int64
total_likes        273 non-null int64
total_reactions    273 non-null int64
total_shares       273 non-null int64
type               273 non-null object
page               273 non-null object
dtypes: int64(4), object(8)
memory usage: 25.7+ KB


In [10]:
pages_feed_data.tail()

Unnamed: 0,created_time,date,id,message,permalink_url,status_type,total_comments,total_likes,total_reactions,total_shares,type,page
268,2017-04-22T21:00:00+0000,22-04-2017,173450952675280_1405661482787548,"""Informática Cidadã"" é um projeto da UnP volta...",https://www.facebook.com/universidadepotiguar/...,shared_story,0,50,53,3,link,UnP – Universidade Potiguar
269,2017-04-23T00:00:00+0000,23-04-2017,173450952675280_1405661599454203,Se você deseja melhorar as suas habilidades de...,https://www.facebook.com/universidadepotiguar/...,added_photos,1,29,34,11,photo,UnP – Universidade Potiguar
270,2017-04-23T12:00:00+0000,23-04-2017,173450952675280_1405663359454027,Hoje é o #DiaNacionalDaEducaçãoDeSurdos. A gen...,https://www.facebook.com/universidadepotiguar/...,added_photos,1,35,37,4,photo,UnP – Universidade Potiguar
271,2017-04-23T15:00:00+0000,23-04-2017,173450952675280_1405664782787218,Hoje é o #DiaMundialDoLivro e queremos fazer u...,https://www.facebook.com/universidadepotiguar/...,added_photos,4,9,9,0,photo,UnP – Universidade Potiguar
272,2017-04-23T18:00:00+0000,23-04-2017,173450952675280_1405668476120182,Você já pode se programar! O maior evento acad...,https://www.facebook.com/universidadepotiguar/...,added_photos,0,1,1,3,photo,UnP – Universidade Potiguar


In [16]:
from bokeh.charts import Scatter, Bar
from bokeh.io import output_notebook, show
from bokeh.layouts import row

In [17]:
page_one = Scatter(
        page_feed_data_one,
        x='date',
        y='total_reactions', 
        xlabel='Date',
        ylabel='Total Reactions', 
        color='type',
        title='Date vs Total Reactions - ' + page_one['name']
)

page_two = Scatter(
        page_feed_data_two,
        x='date',
        y='total_reactions', 
        xlabel='Date',
        ylabel='Total Reactions', 
        color='type',
        title='Date vs Total Reactions - ' + page_two['name']
)

layout = row(page_one, page_two, sizing_mode='scale_width')
output_notebook()
show(layout)

In [14]:
p = Bar(
    pages_feed_data,
    label='date',
    values='total_reactions',
    agg='sum',
    stack='page',
    xlabel='Date',
    ylabel='Total Reactions',
    title="Sum Post Total Reactions by Date, stacked by Page",
    legend='top_right'
)
output_notebook()
show(p)

In [15]:
# References

# https://facebook-sdk.readthedocs.io/en/latest/api.html
# https://developers.facebook.com/docs/graph-api/reference/v2.7