-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #8 from OKN-CollabNext/python-code-refactoring
Refactor python code
- Loading branch information
Showing
9 changed files
with
114 additions
and
63 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
from dotenv import load_dotenv | ||
|
||
# Load Secrets | ||
load_dotenv() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
import os | ||
|
||
import pyalex | ||
|
||
# Initialize the pyalex client | ||
pyalex.config.email = os.getenv("OPENALEX_EMAIL") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
from pyalex import Author, Authors, Institution | ||
|
||
|
||
def get_affiliated_authors(institutions: list[Institution]) -> list[Author]: | ||
seen = set() | ||
return [ | ||
y | ||
for x in institutions | ||
for y in Authors().filter(affiliations={"institution": {"id": x["id"]}}).get() | ||
if not (y["id"] in seen or seen.add(y["id"])) | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
from pyalex import Author, Institution | ||
|
||
|
||
def make_associated_institution_edges(institutions: list[Institution]) -> list[dict]: | ||
return [ | ||
{ | ||
"id": f"""{x["id"]}-{y["id"]}""", | ||
"start": x["id"], | ||
"end": y["id"], | ||
"label": "ASSOCIATED", | ||
"start_type": "INSTITUTION", | ||
"end_type": "INSTITUTION", | ||
} | ||
for x in institutions | ||
for y in x["associated_institutions"] | ||
] | ||
|
||
|
||
def make_affiliated_author_edges(authors: list[Author]) -> list[dict]: | ||
return [ | ||
{ | ||
"id": f"""{x["id"]}-{y["institution"]["id"]}""", | ||
"start": x["id"], | ||
"end": y["institution"]["id"], | ||
"label": "AFFILIATED", | ||
"start_type": "AUTHOR", | ||
"end_type": "INSTITUTION", | ||
} | ||
for x in authors | ||
for y in x["affiliations"] | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
from pyalex import Institution, Institutions | ||
|
||
|
||
def get_institutions() -> list[Institution]: | ||
# Get 5 random institutions for now | ||
return [Institutions().random() for _ in range(5)] | ||
|
||
|
||
def get_associated_institutions(institutions: list[Institution]) -> list[Institution]: | ||
# Gather associated institutions | ||
seen = set() | ||
associated_institutions = [ | ||
y | ||
for x in institutions | ||
for y in x["associated_institutions"] | ||
if not (y["id"] in seen or seen.add(y["id"])) | ||
] | ||
return associated_institutions | ||
|
||
|
||
def dedup_institutions(institutions: list[Institution]) -> list[Institution]: | ||
seen = set() | ||
return [x for x in institutions if not (x["id"] in seen or seen.add(x["id"]))] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
from pyalex import Author, Institution | ||
|
||
|
||
def make_institution_nodes(institutions: list[Institution]) -> list[dict]: | ||
return [ | ||
{"id": x["id"], "label": x["display_name"], "type": "INSTITUTION"} | ||
for x in institutions | ||
] | ||
|
||
|
||
def make_author_nodes(authors: list[Author]) -> list[dict]: | ||
return [ | ||
{"id": x["id"], "label": x["display_name"], "type": "AUTHOR"} for x in authors | ||
] |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,79 +1,41 @@ | ||
import json | ||
import os | ||
|
||
import pyalex | ||
from dotenv import load_dotenv | ||
from pyalex import Authors, Institutions | ||
from collabnext.openalex.authors import get_affiliated_authors | ||
from collabnext.openalex.edges import ( | ||
make_affiliated_author_edges, | ||
make_associated_institution_edges, | ||
) | ||
from collabnext.openalex.institutions import ( | ||
dedup_institutions, | ||
get_associated_institutions, | ||
get_institutions, | ||
) | ||
from collabnext.openalex.nodes import make_author_nodes, make_institution_nodes | ||
|
||
# Load Secrets | ||
load_dotenv() | ||
# Get institutions | ||
institutions = get_institutions() | ||
|
||
# Initialize the pyalex client | ||
pyalex.config.email = os.getenv("OPENALEX_EMAIL") | ||
|
||
# Get 5 random institutions | ||
institutions = [Institutions().random() for _ in range(5)] | ||
|
||
# Gather associated institutions | ||
associated_institutions = [ | ||
y for x in institutions for y in x["associated_institutions"] | ||
] | ||
# Get associated institutions | ||
associated_institutions = get_associated_institutions(institutions) | ||
|
||
# Combine all unique institutions | ||
seen = set() | ||
all_institutions = [ | ||
x | ||
for x in [*institutions, *associated_institutions] | ||
if not (x["id"] in seen or seen.add(x["id"])) | ||
] | ||
all_institutions = dedup_institutions([*institutions, *associated_institutions]) | ||
|
||
# Create nodes | ||
institution_nodes = [ | ||
{"id": x["id"], "label": x["display_name"], "type": "INSTITUTION"} | ||
for x in all_institutions | ||
] | ||
institution_nodes = make_institution_nodes(all_institutions) | ||
|
||
# Get unique affiliated authors | ||
seen = set() | ||
authors = [ | ||
y | ||
for x in all_institutions | ||
for y in Authors().filter(affiliations={"institution": {"id": x["id"]}}).get() | ||
if not (y["id"] in seen or seen.add(y["id"])) | ||
] | ||
authors = get_affiliated_authors(all_institutions) | ||
|
||
# Get unique authors affiliated with each institution | ||
author_nodes = [ | ||
{"id": x["id"], "label": x["display_name"], "type": "AUTHOR"} for x in authors | ||
] | ||
|
||
nodes = [*institution_nodes, *author_nodes] | ||
author_nodes = make_author_nodes(authors) | ||
|
||
# Create associated institution edges | ||
associated_institution_edges = [ | ||
{ | ||
"id": f"""{x["id"]}-{y["id"]}""", | ||
"start": x["id"], | ||
"end": y["id"], | ||
"label": "ASSOCIATED", | ||
"start_type": "INSTITUTION", | ||
"end_type": "INSTITUTION", | ||
} | ||
for x in institutions | ||
for y in x["associated_institutions"] | ||
] | ||
affiliated_author_edges = [ | ||
{ | ||
"id": f"""{x["id"]}-{y["institution"]["id"]}""", | ||
"start": x["id"], | ||
"end": y["institution"]["id"], | ||
"label": "AFFILIATED", | ||
"start_type": "AUTHOR", | ||
"end_type": "INSTITUTION", | ||
} | ||
for x in authors | ||
for y in x["affiliations"] | ||
] | ||
associated_institution_edges = make_associated_institution_edges(institutions) | ||
affiliated_author_edges = make_affiliated_author_edges(authors) | ||
|
||
# Group all nodes and edges together | ||
nodes = [*institution_nodes, *author_nodes] | ||
edges = [*associated_institution_edges, *affiliated_author_edges] | ||
|
||
print(json.dumps({"nodes": nodes, "edges": edges})) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters