In [109]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [237]:
import datetime, time
import json
import urllib.parse
import pandas as pd
import configparser
from pprint import pprint

from ATB.ATB.Altmetric import Altmetric, AltmetricHTTPException
from ATB.ATB.Facebook import Facebook
from ATB.ATB.DBConnection import DBConnection
from ATB.ATB.Utils import resolve_doi, print_progress_bar

# Load config
Config = configparser.ConfigParser()
Config.read('config.cnf')
FACEBOOK_APP_ID = Config.get('facebook', 'app_id')
FACEBOOK_APP_SECRET = Config.get('facebook', 'app_secret')
ALTMETRIC_KEY = Config.get('altmetric', 'key')

In [238]:
db_path = 'data/main.db'
db_table = 'testing'
db_col_names = ['doi', 'timestamp', 'doi_resolve_status',
                'doi_resolve_error', 'doi_url', 'fb_og_object',
                'fb_engagement', 'fb_response_error', 'am_response','am_response_error']
db_col_types = ['TEXT', 'TEXT', 'INTEGER',
                'TEXT', 'TEXT', 'TEXT',
                'TEXT', 'TEXT', 'TEXT', 'TEXT']
unique_col = "doi"

db_con = DBConnection(db_path=db_path,
                      db_table=db_table,
                      db_col_names=db_col_names,
                      db_col_types=db_col_types,
                      unique_col=unique_col)

In [164]:
fb_graph = Facebook(FACEBOOK_APP_ID, FACEBOOK_APP_SECRET)
altmetric = Altmetric(api_key = ALTMETRIC_KEY)

Generated access token: 287299458433880|6Y_ml710QWnU7HBYLWjaneoWVKU


In [9]:
df = pd.read_csv("data/input_files/state_of_oa.csv", encoding = 'utf8')
dois = df.doi

In [188]:
def parse_response(doi, now, doi_resolve_status, doi_resolve_error, doi_url, 
                   fb_og_object, fb_engagement, fb_response_error, am_response, am_response_error):

    row = {'doi':str(doi),
           'timestamp':str(now),
           'doi_resolve_status':str(doi_resolve_status),
           'doi_resolve_error':str(doi_resolve_error),
           'doi_url':str(doi_url),
           'fb_og_object':json.dumps(fb_og_object),
           'fb_engagement':json.dumps(fb_engagement),
           'fb_response_error':str(fb_response_error),
           'am_response':json.dumps(am_response),
           'am_response_error':str(am_response_error),
          }
    
    return row

In [194]:
existing_dois = db_con.select("doi", "testing")
dois = [doi for doi in dois if doi not in existing_dois]
input_list = dois.sample(20)
i_max = len(input_list)

for i, doi in enumerate(input_list, 1):
    now = datetime.datetime.now()
    
    # Init row values
    doi_resolve_status = None
    doi_resolve_error = None
    doi_url = None
    fb_og_object = None
    fb_engagement = None
    fb_response_error = None 
    am_response = None
    am_response_error = None
    
    # Resolve DOI
    response_status, response = resolve_doi(doi)

    # if the DOI resolving fails (timeouts, too many redirects, ...)
    if response_status == "NoResponse":
        doi_resolve_error = response
        
    # successfully resolved DOI
    elif response_status == 200:
        doi_resolve_status = response_status
        doi_url = response
        
        # retriev FB Open Graph Object + engagement
        try:
            fb_response = fb_graph.get_object(id=urllib.parse.quote_plus(doi_url), fields="engagement, og_object")
        except:
            fb_response_error = sys.exc_info()[0]
        
        try:
            fb_og_object = fb_response['og_object']
            fb_engagement = fb_response['engagement']
        except:
            fb_response_error = "no_og_object"
        
    # resolved DOI but status_code != 200
    else:
        doi_resolve_status = response_status
        doi_resolve_error = response
    
    # Get Altmetric Data based on DOI
    try:
        am_response = altmetric.doi(doi=doi, fetch=True)
    except AltmetricHTTPException as e:
        am_response_error = e
    
    # Create DB entry
    row = parse_response(doi, now, doi_resolve_status,
                         doi_resolve_error, doi_url, fb_og_object,
                         fb_engagement, fb_response_error, am_response,am_response_error)
    db_con.save_row(row)
    
    # Do some nice API things
    new = datetime.datetime.now()
    delta = new - now
    m, s = divmod(i_max-i, 60)
    h, m = divmod(m, 60)
    
    if delta.seconds < 1:
        time.sleep(1- delta.total_seconds())
        
    # Print progress bar
    print_progress_bar(i, i_max, length=80, suffix="ETA {:d}:{:d}:{:d}".format(h, m, s))

 |████████████████████████████████████████████████████████████████████████████████| 100.0% ETA 0:0:0
