# Hidden Engagement & Problems

This is an attempt to document some of the problems that were encountered while gathering Facebook engagement data for scholarly articles based on their DOIs.

In [1]:
import datetime, time
import json
import urllib.parse
import pandas as pd
import configparser
from pprint import pprint

from ATB.ATB.Altmetric import Altmetric, AltmetricHTTPException
from ATB.ATB.Facebook import Facebook
from ATB.ATB.Utils import resolve_doi

# Load config
Config = configparser.ConfigParser()
Config.read('config.cnf')
FACEBOOK_APP_ID = Config.get('facebook', 'app_id')
FACEBOOK_APP_SECRET = Config.get('facebook', 'app_secret')
ALTMETRIC_KEY = Config.get('altmetric', 'key')

In [2]:
fb_graph = Facebook(FACEBOOK_APP_ID, FACEBOOK_APP_SECRET)
altmetric = Altmetric(api_key = ALTMETRIC_KEY)

Generated access token: 287299458433880|6Y_ml710QWnU7HBYLWjaneoWVKU


## 1. DOI <-> URL (Resolving DOIs)

Problems while resolving DOIs. See this: https://www.crossref.org/blog/urls-and-dois-a-complicated-relationship/#

## 2. URL <-> OpenGraph (Querying FB API)

1. Facebook is sometimes maps different variatons of the same url (http, https, with and without trailing /) to different Open Graph Objects. Sometimes not...
2. Some URLs to different articles are mapped to a single Open Graph object. Happens, e.g., with Scielo publications because Scielo uses URL parameters to specify pages

In [21]:
url_base = "www.nature.com/news/the-future-of-dna-sequencing-1.22787"
doi = "10.1038/550179a"

urls = ['http://' + url_base,
        'http://' + url_base + '/',
        'https://' + url_base,
        'https://' + url_base + '/',
        'http://dx.doi.org/' + doi,
        'https://dx.doi.org/' + doi,
        'http://doi.org/' + doi,
        'https://doi.org/' + doi]

og_ids = []
shares = []

for url in urls:
    try:
        r = fb_graph.get_object(url, fields="og_object, engagement")
        og_ids.append(r['og_object']['id'])
        shares.append(r['engagement']['share_count'])
    except:
        og_ids.append(None)
        shares.append(None)
    
pd.DataFrame({'URL': urls,
              'OG IDs': og_ids,
              'Shares': shares})[['URL', 'OG IDs', 'Shares']]

Unnamed: 0,URL,OG IDs,Shares
0,http://www.nature.com/news/the-future-of-dna-s...,1416175841826563.0,510.0
1,http://www.nature.com/news/the-future-of-dna-s...,1446759318778508.0,0.0
2,https://www.nature.com/news/the-future-of-dna-...,1585548861525924.0,1.0
3,https://www.nature.com/news/the-future-of-dna-...,1649534128454620.0,0.0
4,http://dx.doi.org/10.1038/550179a,1472429859490322.0,509.0
5,https://dx.doi.org/10.1038/550179a,,
6,http://doi.org/10.1038/550179a,,
7,https://doi.org/10.1038/550179a,,


In [19]:
url_base = "bmcbiol.biomedcentral.com/articles/10.1186/1741-7007-10-51"
doi = "10.1186/1741-7007-10-51"

urls = ['http://' + url_base,
        'http://' + url_base + '/',
        'https://' + url_base,
        'https://' + url_base + '/',
        'http://dx.doi.org/' + doi,
        'https://dx.doi.org/' + doi,
        'http://doi.org/' + doi,
        'https://doi.org/' + doi]

og_ids = []
shares = []

for url in urls:
    try:
        r = fb_graph.get_object(url, fields="og_object, engagement")
        og_ids.append(r['og_object']['id'])
        shares.append(r['engagement']['share_count'])
    except:
        og_ids.append(None)
        shares.append(None)
    
pd.DataFrame({'URL': urls,
              'OG IDs': og_ids,
              'Shares': shares})

Unnamed: 0,OG IDs,Shares,URL
0,2058851297473624,1,http://bmcbiol.biomedcentral.com/articles/10.1...
1,2058851297473624,0,http://bmcbiol.biomedcentral.com/articles/10.1...
2,2058851297473624,0,https://bmcbiol.biomedcentral.com/articles/10....
3,2058851297473624,0,https://bmcbiol.biomedcentral.com/articles/10....
4,2058851297473624,0,http://dx.doi.org/10.1186/1741-7007-10-51
5,2058851297473624,0,https://dx.doi.org/10.1186/1741-7007-10-51
6,2058851297473624,0,http://doi.org/10.1186/1741-7007-10-51
7,2058851297473624,0,https://doi.org/10.1186/1741-7007-10-51


## 3. Differing results for FB API & Altmetric.com

Example DOI: [10.1186/1741-7007-10-51](10.1186/1741-7007-10-51) which resolves to this URL https://bmcbiol.biomedcentral.com/articles/10.1186/1741-7007-10-51

Detailed Altmetric results for the DOI: [Altmetric results](https://biomedcentral.altmetric.com/details/799209/facebook)

http://www.biomedcentral.com/1741-7007/10/51/

In [3]:
url = "http://bmcbiol.biomedcentral.com/articles/10.1186/1741-7007-10-51"
doi = "10.1186/1741-7007-10-51"

### Results per Altmetric API

In [4]:
alt_url = altmetric.uri(url, fetch=True)
alt_doi = altmetric.doi(doi, fetch=True)

print("DOI and ULR have same altmetric_id: {}".format(alt_url['altmetric_id'] == alt_doi['altmetric_id']))

print("FB shares: {}".format(alt_doi['counts']['facebook']['posts_count']))

DOI and ULR have same altmetric_id: True
FB shares: 38


### Results per Facebook API

In [5]:
fb_url = fb_graph.get_object(url, fields="engagement, og_object")
fb_doi = fb_graph.get_object("http://dx.doi.org/" + doi, fields="engagement, og_object")

print("DOI and ULR have same og_object_id: {}".format(fb_url['og_object']['id'] == fb_doi['og_object']['id']))

print("FB shares for URL: {}".format(fb_url['engagement']['share_count']))
print("FB shares for DOI: {}".format(fb_doi['engagement']['share_count']))

DOI and ULR have same og_object_id: True
FB shares for URL: 1
FB shares for DOI: 0
