In [None]:
%pip install --upgrade pip

In [None]:
%pip install --upgrade --no-cache-dir "git+https://github.com/Mearman/openalex-python.git"

In [4]:
%pip install pandas altair tabulate

Note: you may need to restart the kernel to use updated packages.


In [6]:
import altair as alt
import pandas as pd

import openalex_api

In [7]:
configuration = openalex_api.Configuration(
	host="https://api.openalex.org"
)

In [8]:
authors_api = openalex_api.AuthorsApi(openalex_api.ApiClient(configuration))
works_api = openalex_api.WorksApi(openalex_api.ApiClient(configuration))
concepts_api = openalex_api.ConceptsApi(openalex_api.ApiClient(configuration))

In [19]:
authors = authors_api.get_authors(
	per_page=5,
	page=1,
	sort="cited_by_count:desc",
).results

In [20]:
author_works_stats = pd.DataFrame(
	[{
		"name": author["display_name"],
		"author": author["id"],
		"name_and_id": f"{author['display_name']} ({author['ids']['openalex'].split('/')[-1]})",
		"year": entry["year"],
		"cited_by_count": entry["cited_by_count"],
		"works_count": entry["works_count"]
	} for author in authors for entry in author["counts_by_year"]],
	columns=["name", "name_and_id", "author", "year", "cited_by_count", "works_count"]
)
author_works_stats

Unnamed: 0,name,name_and_id,author,year,cited_by_count,works_count
0,Jun Li,Jun Li (A5027835055),https://openalex.org/A5027835055,2024,11470,236
1,Jun Li,Jun Li (A5027835055),https://openalex.org/A5027835055,2023,157502,3984
2,Jun Li,Jun Li (A5027835055),https://openalex.org/A5027835055,2022,149154,3918
3,Jun Li,Jun Li (A5027835055),https://openalex.org/A5027835055,2021,131493,2716
4,Jun Li,Jun Li (A5027835055),https://openalex.org/A5027835055,2020,106902,2743
...,...,...,...,...,...,...
60,Li Li,Li Li (A5027475930),https://openalex.org/A5027475930,2016,33066,2421
61,Li Li,Li Li (A5027475930),https://openalex.org/A5027475930,2015,28798,1839
62,Li Li,Li Li (A5027475930),https://openalex.org/A5027475930,2014,24562,1794
63,Li Li,Li Li (A5027475930),https://openalex.org/A5027475930,2013,20852,2010


In [21]:
# filter out current year
author_works_stats = author_works_stats[author_works_stats["year"] < 2024]

# plot with altair, marking each year on the x axis
alt.Chart(
	author_works_stats[
		# 	filter out zero values
		(author_works_stats["cited_by_count"] > 0) & (author_works_stats["works_count"] > 0)
		]
).mark_line().encode(
	alt.X(
		"year:O",
		axis=alt.Axis(
			labelAngle=0,
			title="Year",
			titleFontSize=14,
			titleFontWeight="bold",
			titleColor="gray"
		)
	),
	alt.Y(
		"cited_by_count:Q",
		scale=alt.Scale(type='log'),
		axis=alt.Axis(
			title="Citations",
			titleFontSize=14,
			titleFontWeight="bold",
			titleColor="gray",
		)
	),
	alt.Color(
		"name_and_id:N"
	).sort(
		alt.SortField(
			"cited_by_count",
			order="descending"
		)
	)
).interactive(
	bind_y=False
).properties(
	width=800,
	height=600
)

In [22]:
scatter_chart = alt.Chart(author_works_stats).mark_circle(size=60).encode(
	# x='cited_by_count:Q',
	alt.X(
		"cited_by_count:Q",
	),
	alt.Y(
		"works_count:Q",
	),
	color='name:N',
	tooltip=['name', 'year', 'cited_by_count', 'works_count']
).properties(
	title='Relationship between Cited by Count and Works Count'
)

scatter_chart.display()

In [23]:
filtered_autor_works = author_works_stats[
	(
		author_works_stats['cited_by_count'] > 0
	) & (
		author_works_stats['works_count'] > 0
	)]
filtered_autor_works

Unnamed: 0,name,name_and_id,author,year,cited_by_count,works_count
1,Jun Li,Jun Li (A5027835055),https://openalex.org/A5027835055,2023,157502,3984
2,Jun Li,Jun Li (A5027835055),https://openalex.org/A5027835055,2022,149154,3918
3,Jun Li,Jun Li (A5027835055),https://openalex.org/A5027835055,2021,131493,2716
4,Jun Li,Jun Li (A5027835055),https://openalex.org/A5027835055,2020,106902,2743
5,Jun Li,Jun Li (A5027835055),https://openalex.org/A5027835055,2019,80867,2412
6,Jun Li,Jun Li (A5027835055),https://openalex.org/A5027835055,2018,66339,2005
7,Jun Li,Jun Li (A5027835055),https://openalex.org/A5027835055,2017,56853,1827
8,Jun Li,Jun Li (A5027835055),https://openalex.org/A5027835055,2016,48588,2316
9,Jun Li,Jun Li (A5027835055),https://openalex.org/A5027835055,2015,42036,1574
10,Jun Li,Jun Li (A5027835055),https://openalex.org/A5027835055,2014,35812,1356


In [24]:
alt.Chart(filtered_autor_works).mark_circle(size=60).encode(
	x=alt.X(
		'cited_by_count:Q',
		scale=alt.Scale(type='log'),
		title='Cited by Count (Log Scale)'
	),
	y=alt.Y(
		'works_count:Q',
		scale=alt.Scale(type='log'),
		title='Works Count (Log Scale)'
	),
	color='name:N',
	tooltip=['name', 'year', 'cited_by_count', 'works_count']
).properties(
	title='Relationship between Cited by Count and Works Count on Logarithmic Scale'
).interactive().configure_axis(
	labelFontSize=14,
	titleFontSize=14
).properties(
	# 	100% width and height
	width=600,
	height=600
)

In [25]:
concepts = pd.DataFrame(
	concepts_api.get_concepts(
		search="Machine Learning",
	).results
)
display(concepts)

Unnamed: 0,id,wikidata,display_name,relevance_score,level,description,works_count,cited_by_count,summary_stats,ids,image_url,image_thumbnail_url,international,ancestors,related_concepts,counts_by_year,works_api_url,updated_date,created_date
0,https://openalex.org/C119857082,https://www.wikidata.org/wiki/Q2539,Machine learning,264263.53,1,scientific study of algorithms and statistical...,3748420,48659152,"{'2yr_mean_citedness': 2.2811401224671792, 'h_...",{'openalex': 'https://openalex.org/C119857082'...,https://upload.wikimedia.org/wikipedia/commons...,https://upload.wikimedia.org/wikipedia/commons...,"{'display_name': {'ar': 'تعلم الآلة', 'ary': '...","[{'id': 'https://openalex.org/C41008148', 'wik...","[{'id': 'https://openalex.org/C154945302', 'wi...","[{'year': 2023, 'works_count': 315475, 'cited_...",https://api.openalex.org/works?filter=concepts...,2024-01-14T10:54:58.319630,2016-06-24
1,https://openalex.org/C774472,https://www.wikidata.org/wiki/Q6760393,Margin (machine learning),29569.48,2,distance between a decision boundary and a dat...,82732,886545,"{'2yr_mean_citedness': 2.1020593924223463, 'h_...","{'openalex': 'https://openalex.org/C774472', '...",https://upload.wikimedia.org/wikipedia/commons...,https://upload.wikimedia.org/wikipedia/commons...,"{'display_name': {'en': 'margin', 'es': 'marge...","[{'id': 'https://openalex.org/C119857082', 'wi...","[{'id': 'https://openalex.org/C60908668', 'wik...","[{'year': 2023, 'works_count': 4382, 'cited_by...",https://api.openalex.org/works?filter=concepts...,2024-01-14T11:06:32.371945,2016-06-24
2,https://openalex.org/C46686674,https://www.wikidata.org/wiki/Q466303,Boosting (machine learning),24921.547,2,ensemble meta-algorithm for reducing bias and ...,40512,540927,"{'2yr_mean_citedness': 3.254633443163097, 'h_i...","{'openalex': 'https://openalex.org/C46686674',...",https://upload.wikimedia.org/wikipedia/commons...,https://upload.wikimedia.org/wikipedia/commons...,"{'display_name': {'de': 'Boosting', 'en': 'boo...","[{'id': 'https://openalex.org/C154945302', 'wi...","[{'id': 'https://openalex.org/C141404830', 'wi...","[{'year': 2023, 'works_count': 6225, 'cited_by...",https://api.openalex.org/works?filter=concepts...,2024-01-14T11:45:49.735400,2016-06-24
3,https://openalex.org/C77967617,https://www.wikidata.org/wiki/Q4677561,Active learning (machine learning),20236.986,2,machine learning strategy in which a learning ...,42241,415557,"{'2yr_mean_citedness': 1.5557333333333334, 'h_...","{'openalex': 'https://openalex.org/C77967617',...",https://upload.wikimedia.org/wikipedia/commons...,https://upload.wikimedia.org/wikipedia/commons...,"{'display_name': {'bn': 'active learning', 'da...","[{'id': 'https://openalex.org/C154945302', 'wi...","[{'id': 'https://openalex.org/C58973888', 'wik...","[{'year': 2023, 'works_count': 2361, 'cited_by...",https://api.openalex.org/works?filter=concepts...,2024-01-14T11:20:14.475586,2016-06-24
4,https://openalex.org/C115903097,https://www.wikidata.org/wiki/Q7094097,Online machine learning,9328.731,3,method of machine learning,2450,70917,"{'2yr_mean_citedness': 2.3670103092783505, 'h_...",{'openalex': 'https://openalex.org/C115903097'...,https://upload.wikimedia.org/wikipedia/commons...,https://upload.wikimedia.org/wikipedia/commons...,{'display_name': {'ca': 'aprenentatge automàti...,"[{'id': 'https://openalex.org/C77967617', 'wik...","[{'id': 'https://openalex.org/C12267149', 'wik...","[{'year': 2023, 'works_count': 211, 'cited_by_...",https://api.openalex.org/works?filter=concepts...,2024-01-14T11:17:10.993126,2016-06-24
5,https://openalex.org/C124101348,https://www.wikidata.org/wiki/Q172491,Data mining,8699.676,1,process of discovering patterns in large data ...,2098630,25084676,"{'2yr_mean_citedness': 2.000223502240904, 'h_i...",{'openalex': 'https://openalex.org/C124101348'...,https://upload.wikimedia.org/wikipedia/commons...,https://upload.wikimedia.org/wikipedia/commons...,"{'display_name': {'ar': 'تنقيب في البيانات', '...","[{'id': 'https://openalex.org/C41008148', 'wik...","[{'id': 'https://openalex.org/C119857082', 'wi...","[{'year': 2023, 'works_count': 154585, 'cited_...",https://api.openalex.org/works?filter=concepts...,2024-01-14T11:02:49.898806,2016-06-24
6,https://openalex.org/C50644808,https://www.wikidata.org/wiki/Q192776,Artificial neural network,8241.652,2,"computational model used in machine learning, ...",983321,12530264,"{'2yr_mean_citedness': 2.628948783965156, 'h_i...","{'openalex': 'https://openalex.org/C50644808',...",https://upload.wikimedia.org/wikipedia/commons...,https://upload.wikimedia.org/wikipedia/commons...,{'display_name': {'ar': 'شبكة عصبونية اصطناعية...,"[{'id': 'https://openalex.org/C154945302', 'wi...","[{'id': 'https://openalex.org/C154945302', 'wi...","[{'year': 2023, 'works_count': 98359, 'cited_b...",https://api.openalex.org/works?filter=concepts...,2024-01-14T11:45:34.167667,2016-06-24
7,https://openalex.org/C108583219,https://www.wikidata.org/wiki/Q197536,Deep learning,7424.0796,2,branch of machine learning,277526,4423364,"{'2yr_mean_citedness': 3.471791879406759, 'h_i...",{'openalex': 'https://openalex.org/C108583219'...,https://upload.wikimedia.org/wikipedia/commons...,https://upload.wikimedia.org/wikipedia/commons...,"{'display_name': {'af': 'diepleer', 'ar': 'الت...","[{'id': 'https://openalex.org/C154945302', 'wi...","[{'id': 'https://openalex.org/C50644808', 'wik...","[{'year': 2023, 'works_count': 57117, 'cited_b...",https://api.openalex.org/works?filter=concepts...,2024-01-14T11:14:27.790471,2016-06-24
8,https://openalex.org/C2778403875,https://www.wikidata.org/wiki/Q20312394,Adversarial machine learning,7013.0337,3,machine learning technique that attempts to fo...,1727,42894,"{'2yr_mean_citedness': 2.2466666666666666, 'h_...",{'openalex': 'https://openalex.org/C2778403875...,,,"{'display_name': {'de': 'Adversarial Attack', ...","[{'id': 'https://openalex.org/C37736160', 'wik...","[{'id': 'https://openalex.org/C110083411', 'wi...","[{'year': 2023, 'works_count': 319, 'cited_by_...",https://api.openalex.org/works?filter=concepts...,2024-01-14T11:03:40.898594,2018-01-05
9,https://openalex.org/C2779094486,https://www.wikidata.org/wiki/Q18811578,Quantum machine learning,5291.943,4,interdisciplinary research area at the interse...,1554,24145,"{'2yr_mean_citedness': 3.147011308562197, 'h_i...",{'openalex': 'https://openalex.org/C2779094486...,https://upload.wikimedia.org/wikipedia/commons...,https://upload.wikimedia.org/wikipedia/commons...,{'display_name': {'en': 'quantum machine learn...,"[{'id': 'https://openalex.org/C137019171', 'wi...","[{'id': 'https://openalex.org/C119857082', 'wi...","[{'year': 2023, 'works_count': 415, 'cited_by_...",https://api.openalex.org/works?filter=concepts...,2024-01-14T11:53:53.303917,2018-01-05


In [26]:
machine_learning_concept = concepts.sort_values("relevance_score", ascending=False).iloc[0]
display(machine_learning_concept)

id                                       https://openalex.org/C119857082
wikidata                             https://www.wikidata.org/wiki/Q2539
display_name                                            Machine learning
relevance_score                                                264263.53
level                                                                  1
description            scientific study of algorithms and statistical...
works_count                                                      3748420
cited_by_count                                                  48659152
summary_stats          {'2yr_mean_citedness': 2.2811401224671792, 'h_...
ids                    {'openalex': 'https://openalex.org/C119857082'...
image_url              https://upload.wikimedia.org/wikipedia/commons...
image_thumbnail_url    https://upload.wikimedia.org/wikipedia/commons...
international          {'display_name': {'ar': 'تعلم الآلة', 'ary': '...
ancestors              [{'id': 'https://openalex.or

In [27]:
machine_learning_concept_id = machine_learning_concept["ids"]["openalex"].split("/")[-1]
display(machine_learning_concept_id)

'C119857082'

In [28]:
filters = ",".join(
	[f"{key}:{value}" for key, value in {
		"concepts.id": machine_learning_concept_id,
		"publication_year": ">1950"
	}.items()]
)
worksDf = pd.DataFrame(
	works_api.get_works(
		sort="cited_by_count:desc",
		filter=filters,
	).results
)
display(worksDf)

Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,language,primary_location,type,...,grants,referenced_works_count,referenced_works,related_works,ngrams_url,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date
0,https://openalex.org/W2194775991,https://doi.org/10.1109/cvpr.2016.90,Deep Residual Learning for Image Recognition,Deep Residual Learning for Image Recognition,2016,2016-06-01,{'openalex': 'https://openalex.org/W2194775991...,en,"{'is_oa': False, 'landing_page_url': 'https://...",article,...,[],22,"[https://openalex.org/W1536680647, https://ope...","[https://openalex.org/W2970686063, https://ope...",https://api.openalex.org/works/W2194775991/ngrams,"{'Deeper': [0], 'neural': [1], 'networks': [2,...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 22621}, {'ye...",2024-01-08T20:51:03.118667,2016-06-24
1,https://openalex.org/W2112796928,https://doi.org/10.1109/5.726791,Gradient-based learning applied to document re...,Gradient-based learning applied to document re...,1998,1998-01-01,{'openalex': 'https://openalex.org/W2112796928...,en,"{'is_oa': False, 'landing_page_url': 'https://...",article,...,[],64,"[https://openalex.org/W103129759, https://open...","[https://openalex.org/W3047607512, https://ope...",https://api.openalex.org/works/W2112796928/ngrams,"{'Multilayer': [0], 'neural': [1, 69, 172], 'n...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 4642}, {'yea...",2024-01-08T20:51:01.418292,2016-06-24
2,https://openalex.org/W2024060531,https://doi.org/10.1126/science.220.4598.671,Optimization by Simulated Annealing,Optimization by Simulated Annealing,1983,1983-05-13,{'openalex': 'https://openalex.org/W2024060531...,en,"{'is_oa': False, 'landing_page_url': 'https://...",article,...,[],16,"[https://openalex.org/W184931548, https://open...","[https://openalex.org/W2921069127, https://ope...",https://api.openalex.org/works/W2024060531/ngrams,"{'There': [0], 'is': [1], 'a': [2, 23, 35, 50]...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 1326}, {'yea...",2024-01-07T13:26:54.237005,2016-06-24
3,https://openalex.org/W2108598243,https://doi.org/10.1109/cvpr.2009.5206848,ImageNet: A large-scale hierarchical image dat...,ImageNet: A large-scale hierarchical image dat...,2009,2009-06-01,{'openalex': 'https://openalex.org/W2108598243...,en,"{'is_oa': False, 'landing_page_url': 'https://...",article,...,[],15,"[https://openalex.org/W1521539493, https://ope...","[https://openalex.org/W2326857978, https://ope...",https://api.openalex.org/works/W2108598243/ngrams,"{'The': [0], 'explosion': [1], 'of': [2, 56, 6...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 6038}, {'yea...",2024-01-09T00:48:48.539012,2016-06-24
4,https://openalex.org/W2107328434,https://doi.org/10.1016/0197-2456(86)90046-2,Meta-analysis in clinical trials,Meta-analysis in clinical trials,1986,1986-09-01,{'openalex': 'https://openalex.org/W2107328434...,en,"{'is_oa': False, 'landing_page_url': 'https://...",article,...,[],11,"[https://openalex.org/W1952298078, https://ope...","[https://openalex.org/W2095812817, https://ope...",https://api.openalex.org/works/W2107328434/ngrams,"{'This': [0, 66], 'paper': [1], 'examines': [2...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 1762}, {'yea...",2024-01-07T10:39:50.391234,2016-06-24
5,https://openalex.org/W2159306398,https://doi.org/10.1007/bf02310555,Coefficient alpha and the internal structure o...,Coefficient alpha and the internal structure o...,1951,1951-09-01,{'openalex': 'https://openalex.org/W2159306398...,en,"{'is_oa': False, 'landing_page_url': 'https://...",article,...,[],25,"[https://openalex.org/W1968439435, https://ope...","[https://openalex.org/W2125516453, https://ope...",https://api.openalex.org/works/W2159306398/ngrams,"{'A': [0], 'general': [1], 'formula': [2], '(α...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 1722}, {'yea...",2024-01-08T04:03:46.377931,2016-06-24
6,https://openalex.org/W2122912498,https://doi.org/10.1037/0033-2909.103.3.411,Structural equation modeling in practice: A re...,Structural equation modeling in practice: A re...,1988,1988-05-01,{'openalex': 'https://openalex.org/W2122912498...,en,"{'is_oa': False, 'landing_page_url': 'https://...",article,...,[],70,"[https://openalex.org/W36714851, https://opena...","[https://openalex.org/W2899084033, https://ope...",https://api.openalex.org/works/W2122912498/ngrams,"{'In': [0], 'this': [1, 48], 'article,': [2], ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 2074}, {'yea...",2024-01-08T09:45:39.127330,2016-06-24
7,https://openalex.org/W2097117768,https://doi.org/10.1109/cvpr.2015.7298594,Going deeper with convolutions,Going deeper with convolutions,2015,2015-06-01,{'openalex': 'https://openalex.org/W2097117768...,en,"{'is_oa': False, 'landing_page_url': 'https://...",article,...,[],9,"[https://openalex.org/W2068730032, https://ope...","[https://openalex.org/W4312417841, https://ope...",https://api.openalex.org/works/W2097117768/ngrams,"{'We': [0], 'propose': [1], 'a': [2, 49, 98], ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 3959}, {'yea...",2024-01-11T05:23:19.375279,2016-06-24
8,https://openalex.org/W1901129140,https://doi.org/10.1007/978-3-319-24574-4_28,U-Net: Convolutional Networks for Biomedical I...,U-Net: Convolutional Networks for Biomedical I...,2015,2015-01-01,{'openalex': 'https://openalex.org/W1901129140...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",book-chapter,...,[],9,"[https://openalex.org/W1677182931, https://ope...","[https://openalex.org/W1669643531, https://ope...",https://api.openalex.org/works/W1901129140/ngrams,"{'There': [0], 'is': [1, 131], 'large': [2, 12...",https://api.openalex.org/works?filter=cites:W1...,"[{'year': 2023, 'cited_by_count': 6368}, {'yea...",2024-01-08T14:36:12.961910,2016-06-24
9,https://openalex.org/W2117539524,https://doi.org/10.1007/s11263-015-0816-y,ImageNet Large Scale Visual Recognition Challenge,ImageNet Large Scale Visual Recognition Challenge,2015,2015-04-11,{'openalex': 'https://openalex.org/W2117539524...,en,"{'is_oa': False, 'landing_page_url': 'https://...",article,...,[],60,"[https://openalex.org/W1499991161, https://ope...","[https://openalex.org/W2200925278, https://ope...",https://api.openalex.org/works/W2117539524/ngrams,"{'The': [0, 25], 'ImageNet': [1], 'Large': [2]...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 3530}, {'yea...",2024-01-15T18:42:24.864972,2016-06-24
