In [1]:
import requests
from datetime import datetime, timedelta

In [2]:
def daterange(start_date, end_date):
    for n in range(int((end_date - start_date).days)):
        yield start_date + timedelta(n)

In [3]:
def gen_params(start, end):
    start_str = start.strftime('%Y-%m-%d')
    end_str = end.strftime('%Y-%m-%d')
    date_filter_str = f'from_publication_date:{start_str},to_publication_date:{end_str},cited_by_count:>1,is_retracted:False'
        # cited_by_count:>10, \
        # is_retracted:False'

    params = {
        'filter': f'concepts.id:C119857082,{date_filter_str}',
        # 'sort' : 'publication_date',
        'per_page': 200,
    }

    return params

In [6]:
def fetch(params):
    data = []
    for i in range(1, 51):
        r = requests.get(url, {**params, **{'page': i}})
        json_data = r.json()
        assert json_data['meta']['count'] < 10000
        if i == 1: print(json_data['meta']['count'])
        if len(json_data['results']) == 0:break
        data.extend(json_data['results'])

    return data

In [7]:
url = 'https://api.openalex.org/works'

start = datetime.strptime('1980-01-01', '%Y-%m-%d')
step = timedelta(days=15)
end = datetime.strptime('2000-12-31', '%Y-%m-%d')

params = gen_params(start, end)
r = requests.get(url, params)
json_data = r.json()
print(json_data['meta'])

data = []
curr_date = start
while curr_date < end:
    print(curr_date, curr_date + step)
    params = gen_params(curr_date, curr_date + step)
    data.extend(fetch(params))
    curr_date = curr_date + step


len(data)


{'count': 30616, 'db_response_time_ms': 270, 'page': 1, 'per_page': 200}
1980-01-01 00:00:00 1980-01-16 00:00:00
58
1980-01-16 00:00:00 1980-01-31 00:00:00
0
1980-01-31 00:00:00 1980-02-15 00:00:00
10
1980-02-15 00:00:00 1980-03-01 00:00:00
7
1980-03-01 00:00:00 1980-03-16 00:00:00
7
1980-03-16 00:00:00 1980-03-31 00:00:00
0
1980-03-31 00:00:00 1980-04-15 00:00:00
4
1980-04-15 00:00:00 1980-04-30 00:00:00
1
1980-04-30 00:00:00 1980-05-15 00:00:00
5
1980-05-15 00:00:00 1980-05-30 00:00:00
1
1980-05-30 00:00:00 1980-06-14 00:00:00
6
1980-06-14 00:00:00 1980-06-29 00:00:00
2
1980-06-29 00:00:00 1980-07-14 00:00:00
7
1980-07-14 00:00:00 1980-07-29 00:00:00
1
1980-07-29 00:00:00 1980-08-13 00:00:00
2
1980-08-13 00:00:00 1980-08-28 00:00:00
7
1980-08-28 00:00:00 1980-09-12 00:00:00
8
1980-09-12 00:00:00 1980-09-27 00:00:00
1
1980-09-27 00:00:00 1980-10-12 00:00:00
6
1980-10-12 00:00:00 1980-10-27 00:00:00
0
1980-10-27 00:00:00 1980-11-11 00:00:00
3
1980-11-11 00:00:00 1980-11-26 00:00:00
0
1

0
1987-11-20 00:00:00 1987-12-05 00:00:00
34
1987-12-05 00:00:00 1987-12-20 00:00:00
0
1987-12-20 00:00:00 1988-01-04 00:00:00
270
1988-01-04 00:00:00 1988-01-19 00:00:00
3
1988-01-19 00:00:00 1988-02-03 00:00:00
11
1988-02-03 00:00:00 1988-02-18 00:00:00
4
1988-02-18 00:00:00 1988-03-04 00:00:00
51
1988-03-04 00:00:00 1988-03-19 00:00:00
8
1988-03-19 00:00:00 1988-04-03 00:00:00
33
1988-04-03 00:00:00 1988-04-18 00:00:00
17
1988-04-18 00:00:00 1988-05-03 00:00:00
29
1988-05-03 00:00:00 1988-05-18 00:00:00
3
1988-05-18 00:00:00 1988-06-02 00:00:00
50
1988-06-02 00:00:00 1988-06-17 00:00:00
33
1988-06-17 00:00:00 1988-07-02 00:00:00
25
1988-07-02 00:00:00 1988-07-17 00:00:00
3
1988-07-17 00:00:00 1988-08-01 00:00:00
50
1988-08-01 00:00:00 1988-08-16 00:00:00
29
1988-08-16 00:00:00 1988-08-31 00:00:00
34
1988-08-31 00:00:00 1988-09-15 00:00:00
44
1988-09-15 00:00:00 1988-09-30 00:00:00
3
1988-09-30 00:00:00 1988-10-15 00:00:00
34
1988-10-15 00:00:00 1988-10-30 00:00:00
5
1988-10-30 00:00

54
1995-09-24 00:00:00 1995-10-09 00:00:00
79
1995-10-09 00:00:00 1995-10-24 00:00:00
94
1995-10-24 00:00:00 1995-11-08 00:00:00
89
1995-11-08 00:00:00 1995-11-23 00:00:00
43
1995-11-23 00:00:00 1995-12-08 00:00:00
278
1995-12-08 00:00:00 1995-12-23 00:00:00
19
1995-12-23 00:00:00 1996-01-07 00:00:00
602
1996-01-07 00:00:00 1996-01-22 00:00:00
6
1996-01-22 00:00:00 1996-02-06 00:00:00
83
1996-02-06 00:00:00 1996-02-21 00:00:00
11
1996-02-21 00:00:00 1996-03-07 00:00:00
84
1996-03-07 00:00:00 1996-03-22 00:00:00
22
1996-03-22 00:00:00 1996-04-06 00:00:00
107
1996-04-06 00:00:00 1996-04-21 00:00:00
27
1996-04-21 00:00:00 1996-05-06 00:00:00
92
1996-05-06 00:00:00 1996-05-21 00:00:00
67
1996-05-21 00:00:00 1996-06-05 00:00:00
181
1996-06-05 00:00:00 1996-06-20 00:00:00
58
1996-06-20 00:00:00 1996-07-05 00:00:00
109
1996-07-05 00:00:00 1996-07-20 00:00:00
32
1996-07-20 00:00:00 1996-08-04 00:00:00
189
1996-08-04 00:00:00 1996-08-19 00:00:00
80
1996-08-19 00:00:00 1996-09-03 00:00:00
143
19

34491

In [8]:
len(data)

34491

In [9]:
from pymongo import MongoClient
client = MongoClient('localhost', 27017)

In [10]:
db = client['diva-proj']
papers = db.papers

In [11]:
def format_for_mongo(din):
    from copy import deepcopy
    d = deepcopy(din)
    d['wid'] = d['id'].split('/')[-1]
    d['publication_date'] = datetime.strptime(d['publication_date'], '%Y-%m-%d')
    return d

In [12]:
data_to_insert = [format_for_mongo(d) for d in data]
try:
    papers.insert_many(data_to_insert, ordered=False)
except:
    print('asd')

In [7]:
Author_name="Jason Priem"

In [8]:
url = 'https://api.openalex.org/works?filter=author.id:A2208157607'

import json
r = requests.get(url)
json_data = r.json()
#print(json_data['results'])
dicts={}
author_works=[]
author_title=[]
for i in range(len(json_data['results'])):
    if json_data['results'][i]['cited_by_count']>0:
        author_works.append(json_data['results'][i]['id'])
        author_title.append(json_data['results'][i]['title'])
for i in range(len(author_works)):
    dicts[author_works[i]] = author_title[i]
print(dicts)        

{'https://openalex.org/W3208256308': 'CiteAs: Better Software through Sociotechnical Change for Better Software Citation', 'https://openalex.org/W3084168212': 'The aftermath of Big Deal cancellations and their impact on interlibrary loans.', 'https://openalex.org/W3084303366': 'How much does an interlibrary loan request cost? A review of the literature.', 'https://openalex.org/W3094060352': 'The State of Altmetrics: A Tenth Anniversary Celebration', 'https://openalex.org/W2980172586': 'The Future of OA: A large-scale analysis projecting Open Access publication and readership', 'https://openalex.org/W2941875476': 'Collecting, Calculating and Displaying Altmetrics with Open Source', 'https://openalex.org/W2741809807': 'The state of OA: a large-scale analysis of the prevalence and impact of Open Access articles', 'https://openalex.org/W854896339': 'Altmetrics (Chapter from Beyond Bibliometrics: Harnessing Multidimensional Indicators of Scholarly Impact)', 'https://openalex.org/W2038196424

In [9]:
new_dict={}
for j in author_works:
    new_url="https://api.openalex.org/works?filter=referenced_works:"+j
    r = requests.get(new_url)
    dummy_data = r.json()
    li=[]
    for i in range(len(dummy_data['results'])):
        li.append(dummy_data['results'][i])
        new_dict[dicts[j]]=li
        #papers_publising.append(dummy_data['results'][i]['title'])


In [10]:
# print(new_dict)

In [11]:
final_dict={}
final_dict[Author_name]=new_dict

In [19]:
from pymongo import MongoClient
client1 = MongoClient('localhost', 27017)

In [23]:
import pymongo

myclient = pymongo.MongoClient()
mydb = myclient["diva-proj"]
mycol = mydb["authors"]
#mycol.insert(dicts)
#print(mycol)

x = mycol.insert_one(final_dict).inserted_id


In [17]:
referenced_by=list(mycol.find({'Jason Priem': {'$exists': True}}))[0]

IndexError: list index out of range

In [23]:
def author():
    author_works = list(mycol.find({'Jason Priem': {'$exists': True}}))[0]
    author_works = author_works['Jason Priem']
    # print(referenced_by)
    g2=nx.Graph()
    author_name='Jason Priem'

    all_referenced = dict()
    for auth_paper, references in author_works.items():
        for r in references:
            all_referenced[r['id']] = r
    
    all_referenced = list(all_referenced.values())


    print(len(all_referenced))

    g2.add_node(author_name)
    g2.nodes[author_name]['label'] = author_name
    for paper in all_referenced:
        paper_id = paper['id']
        g2.add_node(paper_id)    
        g2.nodes[paper_id]['label'] = paper['display_name']


    # pprint(g2.nodes.data())
    for paper in all_referenced:
        g2.add_edge(author, paper['id'])

    
    for paper in all_referenced:
        for citation in paper['referenced_works']:
            if citation in g2.nodes:
                g2.add_edge(paper['id'], citation)

    return g2

In [25]:
import networkx as nx
asd = author()

173


In [34]:
for nid in asd.nodes:
    print(nid)
    print(asd.nodes[nid]['label'])
    print('------')

Jason Priem
Jason Priem
------
https://openalex.org/W3214933262
Citation method, please? A case study in astrophysics
------
https://openalex.org/W3165683843
No Deal: Investigating the Influence of Restricted Access to Elsevier Journals on German Researchers' Publishing and Citing Behaviours.
------
https://openalex.org/W3156699904
Bestandscontrolling bei elektronischen Ressourcen: Entscheidungshilfen für die Lizenzierung
------
https://openalex.org/W3188592731
Rethinking Print Journal Subscriptions at a Large Research University
------
https://openalex.org/W3174975695
Is the open access citation advantage real? A systematic review of the citation of open access and subscription-based articles.
------
https://openalex.org/W3185077430
scite: A smart citation index that displays the context of citations and classifies their intent using deep learning
------
https://openalex.org/W3133636160
Attracting new users or business as usual? A case study of converting academic subscription-based j

KeyError: 'label'

In [37]:
asd.nodes

NodeView(('Jason Priem', 'https://openalex.org/W3214933262', 'https://openalex.org/W3165683843', 'https://openalex.org/W3156699904', 'https://openalex.org/W3188592731', 'https://openalex.org/W3174975695', 'https://openalex.org/W3185077430', 'https://openalex.org/W3133636160', 'https://openalex.org/W3167796397', 'https://openalex.org/W3178786088', 'https://openalex.org/W3178526726', 'https://openalex.org/W3169094572', 'https://openalex.org/W3164337962', 'https://openalex.org/W3164869913', 'https://openalex.org/W3137946852', 'https://openalex.org/W3135873852', 'https://openalex.org/W3127916939', 'https://openalex.org/W3113750148', 'https://openalex.org/W3091387501', 'https://openalex.org/W3086317401', 'https://openalex.org/W3100294553', 'https://openalex.org/W3104516614', 'https://openalex.org/W3045830694', 'https://openalex.org/W3044997035', 'https://openalex.org/W3151816200', 'https://openalex.org/W3037826799', 'https://openalex.org/W3034834389', 'https://openalex.org/W3033361750', 'ht