In [2]:
### Library necessary to run this IPython Notebook
!pip install facebook-sdk
!pip install tqdm
!pip install requests
!pip install arrow



In [3]:
import pandas as pd
import facebook
import requests
import arrow
from tqdm import tqdm

In [27]:
# Read env variables
ENV = pd.read_json('.env', orient='records').iloc[0]

In [28]:
graph = facebook.GraphAPI(access_token = ENV['ACCESS_TOKEN'], version = '2.7')

In [29]:
page = graph.get_object(
    id = ENV['PAGE_ID'],
    fields='name, about, category, fan_count, feed.limit(25){created_time, permalink_url, type, status_type, message, shares}'
)

In [48]:
arw = arrow.utcnow()
current_month = arw.month

def extract_feed_data(feed, rows = []):
    should_finish = False

    for post in tqdm(feed['data'], 'Extracting current month data...'):
        post_month = arrow.get(post['created_time']).month
        if(post_month != current_month):
            should_finish = True
            break
        
        likes = graph.get_object(str(post['id']) + '/likes?limit=0&summary=true')
        reactions = graph.get_object(str(post['id']) + '/reactions?limit=0&summary=true')
        comments = graph.get_object(str(post['id']) + '/comments?limit=0&summary=true')

        row_data = {
            'id': post['id'],
            'page_id': ENV['PAGE_ID'],
            'created_time': post['created_time'],
            'date': arrow.get(post['created_time']).format('DD-MM-YYYY'),
            'permalink_url': post['permalink_url'],
            'type': post['type'],
            'status_type': post['status_type'],
            'message': post['message'] if 'message' in post else '-',
            'total_likes': likes['summary']['total_count'],
            'total_reactions': reactions['summary']['total_count'],
            'total_comments': comments['summary']['total_count'],
            'total_shares': post['shares']['count'] if 'shares' in post else 0
        }
        rows.append(row_data)

    if(should_finish):
        print('Month data complete!')
        return rows
    else:
        next_feed = requests.get(feed['paging']['next']).json()
        return extract_feed_data(next_feed, rows)

In [49]:
page_data = pd.DataFrame(extract_feed_data(page['feed']))

# Sort page_data by 'created_time'
page_data.sort_values(['created_time'], ascending = True, inplace = True)
page_data.index = range(1, len(page_data) + 1)

Extracting current month data...: 100%|██████████| 25/25 [00:38<00:00,  1.86s/it]
Extracting current month data...: 100%|██████████| 25/25 [00:37<00:00,  1.40s/it]
Extracting current month data...: 100%|██████████| 25/25 [00:37<00:00,  1.43s/it]
Extracting current month data...:  16%|█▌        | 4/25 [00:05<00:30,  1.45s/it]

Month data complete!





In [67]:
page_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 79 entries, 1 to 79
Data columns (total 12 columns):
created_time       79 non-null object
date               79 non-null object
id                 79 non-null object
message            79 non-null object
page_id            79 non-null object
permalink_url      79 non-null object
status_type        79 non-null object
total_comments     79 non-null int64
total_likes        79 non-null int64
total_reactions    79 non-null int64
total_shares       79 non-null int64
type               79 non-null object
dtypes: int64(4), object(8)
memory usage: 7.5+ KB


In [68]:
page_data.head()

Unnamed: 0,created_time,date,id,message,page_id,permalink_url,status_type,total_comments,total_likes,total_reactions,total_shares,type
1,2017-04-01T12:28:00+0000,01-04-2017,403836489686061_1490111544391878,Escola de Música recebe Encontro de Trombonist...,ufrnoficial,https://www.facebook.com/ufrnoficial/posts/149...,added_photos,0,5,5,0,photo
2,2017-04-01T13:08:00+0000,01-04-2017,403836489686061_1490097141059985,Maternidade Januário Cicco realiza II Semana d...,ufrnoficial,https://www.facebook.com/ufrnoficial/posts/149...,added_photos,0,11,11,0,photo
3,2017-04-01T14:51:00+0000,01-04-2017,403836489686061_1490083781061321,Cooperativa da UFRN comemora 40 anos com Feira...,ufrnoficial,https://www.facebook.com/ufrnoficial/posts/149...,added_photos,0,9,9,6,photo
4,2017-04-01T15:04:00+0000,01-04-2017,403836489686061_1489386757797690,Instituto de Física recebe Workshop sobre Maté...,ufrnoficial,https://www.facebook.com/ufrnoficial/posts/148...,shared_story,5,49,49,6,link
5,2017-04-03T13:52:00+0000,03-04-2017,403836489686061_1490084284394604,Cooperativa da UFRN comemora 40 anos com Feira...,ufrnoficial,https://www.facebook.com/ufrnoficial/posts/149...,added_photos,0,5,5,0,photo


In [69]:
from bokeh.charts import Scatter
from bokeh.io import output_notebook, show

# Informações que podemos tirar do gráfico seguinte: postagens com mais reações, quando ocorreram e seu tipo
p = Scatter(page_data, x='date', y='total_reactions', 
            xlabel='Date', ylabel='Total Reactions', 
            color='type', title='Date vs Total Reactions')
output_notebook()
show(p)
# Por via da dúvidas, der um zoom out

In [None]:
# References

# https://facebook-sdk.readthedocs.io/en/latest/api.html
# https://developers.facebook.com/docs/graph-api/reference/v2.7