In [1]:
import requests
import json
from numerize import numerize
from matplotlib import pyplot as plt

In [2]:
polite = 'mailto=udevz@student.kit.edu'

In [3]:
# define request function for convenience.
# Concatenates request and polite header, sends request and returns response in JSON

def request(api_call):
    req_string = str(api_call+'&'+polite)
    req = requests.get(req_string)
    if req.status_code == 200:
        return req.json()
    else:
        print('Request not successful ', req.status_code)
        return

In [4]:
def random_venue():
    return request('https://api.openalex.org/venues/random?')

In [6]:
random_venue()

{'id': 'https://openalex.org/V4306499115',
 'issn_l': None,
 'issn': None,
 'display_name': "28th Linear Accelerator Conf. (LINAC'16), East Lansing, MI, USA, 25-30 September 2016",
 'publisher': None,
 'works_count': 243,
 'cited_by_count': 95,
 'is_oa': None,
 'is_in_doaj': None,
 'homepage_url': None,
 'ids': {'openalex': 'https://openalex.org/V4306499115'},
 'x_concepts': [{'id': 'https://openalex.org/C121332964',
   'wikidata': 'https://www.wikidata.org/wiki/Q413',
   'display_name': 'Physics',
   'level': 0,
   'score': 77.4},
  {'id': 'https://openalex.org/C127413603',
   'wikidata': 'https://www.wikidata.org/wiki/Q11023',
   'display_name': 'Engineering',
   'level': 0,
   'score': 60.1},
  {'id': 'https://openalex.org/C62520636',
   'wikidata': 'https://www.wikidata.org/wiki/Q944',
   'display_name': 'Quantum mechanics',
   'level': 1,
   'score': 51.4},
  {'id': 'https://openalex.org/C41008148',
   'wikidata': 'https://www.wikidata.org/wiki/Q21198',
   'display_name': 'Compute

In [6]:
# check institutions total count
req_json = request('https://api.openalex.org/venues?')
total_count = req_json['meta']['count']
print('Venues count total: ',numerize.numerize(total_count)) # ~124.07K am 24. Juni
print('Venues count total: ',total_count)

Venues count total:  227.02K
Venues count total:  227016


In [8]:
req_json = request('https://api.openalex.org/venues?group_by=publisher')
venue_publisher_count = {}
for item in req_json['group_by']:
    venue_publisher_count[item['key_display_name']] = item['count']
venue_publisher_count
# a lot of unknowns!

{'unknown': 110307,
 'springer nature': 4327,
 'elsevier': 4269,
 'informa': 3824,
 'wiley': 2426,
 'sage': 1557,
 'de gruyter': 972,
 'hindawi limited': 696,
 'cambridge university press': 606,
 'oxford university press': 568,
 'omics publishing group': 499,
 'emerald (mcb up)': 489,
 'cairn': 469,
 'medknow publications': 467,
 'inderscience enterprises ltd.': 456,
 'jstor': 448,
 'bentham science': 445,
 'openedition': 437,
 'brill': 411,
 'institute of electrical and electronics engineers': 405,
 'african journals online': 399,
 'mdpi ag': 387,
 'ovid technologies (wolters kluwer)': 367,
 'techknowledge general trading llc': 338,
 'persee program': 333,
 'scielo': 315,
 'georg thieme verlag kg': 306,
 'egypts presidential specialized council for education and scientific research': 264,
 'scientific research publishing, inc.': 248,
 'nepal journals online': 227,
 'igi global': 225,
 's. karger ag': 211,
 'armenian green publishing co.': 204,
 'science publishing group': 204,
 'world

In [9]:
request('https://api.openalex.org/venues?group_by=has_issn')

{'meta': {'count': 2, 'db_response_time_ms': 20, 'page': 1, 'per_page': 200},
 'results': [],
 'group_by': [{'key': 'true', 'key_display_name': 'true', 'count': 112141},
  {'key': 'false', 'key_display_name': 'false', 'count': 11932}]}

In [None]:
# how many venues have an ISSN?
req_json = request('https://api.openalex.org/venues?filter=has_issn:true')
print('Venues that have an ISSN:',numerize.numerize(req_json['meta']['count'])) # ~112.14K on 23. June
print('That is',numerize.numerize(100*req_json['meta']['count']/request('https://api.openalex.org/venues?')['meta']['count']), '% of all venues')


In [None]:
req_json = request('https://api.openalex.org/venues?filter=is_oa:true')
print('Venues labeled as open access:',numerize.numerize(req_json['meta']['count']))
print(req_json['meta']['count'])
print('That is',numerize.numerize(100*req_json['meta']['count']/request('https://api.openalex.org/venues?')['meta']['count']), '% of all venues')

In [None]:
req_json = request('https://api.openalex.org/venues?filter=is_in_doaj:true')
print('Venues labeled as in DOAJ:',numerize.numerize(req_json['meta']['count']))
print(req_json['meta']['count'])
print('That is',numerize.numerize(100*req_json['meta']['count']/request('https://api.openalex.org/venues?')['meta']['count']), '% of all venues')

In [None]:
# check open access for all venues
req_json = request('https://api.openalex.org/venues?group_by=is_oa')

oa_status_count={}
for oa_status in req_json['group_by']:
    oa_status_count[oa_status['key_display_name']] = oa_status['count']
print(oa_status_count)

plt.bar(range(len(oa_status_count)), list(oa_status_count.values()), tick_label=list(oa_status_count.keys()))
plt.title('Distribution of open access venues')
plt.xlabel('Status')
plt.ylabel('Count')
plt.gcf().set_size_inches((9, 3))
plt.show()

In [None]:
# top 5 output venues
req_json = request('https://api.openalex.org/venues?sort=works_count:desc')
for top5 in req_json['results'][:10]:
    print(top5['display_name'],':',top5['works_count'])

In [14]:
# top 5 cited venues
req_json = request('https://api.openalex.org/venues?sort=cited_by_count:desc')
for top5 in req_json['results'][:5]:
    print(top5['display_name'],': works count: ',top5['works_count'], ": \ntotal cit. count : ",top5['cited_by_count'], ' : avg cit. :',numerize.numerize(top5['cited_by_count']/top5['works_count']))
    print("###")

Nature : works count:  433660 : 
total cit. count :  20144629  : avg cit. : 46.45
###
Proceedings of the National Academy of Sciences of the United States of America : works count:  157069 : 
total cit. count :  17813834  : avg cit. : 113.41
###
Science : works count:  380574 : 
total cit. count :  17487597  : avg cit. : 45.95
###
Journal of Biological Chemistry : works count:  205102 : 
total cit. count :  14596092  : avg cit. : 71.17
###
Journal of the American Chemical Society : works count:  206081 : 
total cit. count :  12223811  : avg cit. : 59.32
###


In [None]:
req_json = request('https://api.openalex.org/venues?filter=works_count:0')
print('Venues that have 0 works associated:',numerize.numerize(req_json['meta']['count']))
print(req_json['meta']['count'])
print('That is',numerize.numerize(100*req_json['meta']['count']/request('https://api.openalex.org/venues?')['meta']['count']), '% of all venues')

In [None]:
req_json = request('https://api.openalex.org/venues?filter=cited_by_count:0')
print('Venues that have been cited 0 times:',numerize.numerize(req_json['meta']['count']))
print(req_json['meta']['count'])
print('That is',numerize.numerize(100*req_json['meta']['count']/request('https://api.openalex.org/venues?')['meta']['count']), '% of all venues')