Skip to content

Commit

Permalink
Merge pull request #8 from OKN-CollabNext/python-code-refactoring
Browse files Browse the repository at this point in the history
Refactor python code
  • Loading branch information
AbigailDawson committed Apr 16, 2024
2 parents fa5d0bb + bbfd960 commit 8126cee
Show file tree
Hide file tree
Showing 9 changed files with 114 additions and 63 deletions.
4 changes: 4 additions & 0 deletions collabnext/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from dotenv import load_dotenv

# Load Secrets
load_dotenv()
6 changes: 6 additions & 0 deletions collabnext/openalex/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import os

import pyalex

# Initialize the pyalex client
pyalex.config.email = os.getenv("OPENALEX_EMAIL")
11 changes: 11 additions & 0 deletions collabnext/openalex/authors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from pyalex import Author, Authors, Institution


def get_affiliated_authors(institutions: list[Institution]) -> list[Author]:
seen = set()
return [
y
for x in institutions
for y in Authors().filter(affiliations={"institution": {"id": x["id"]}}).get()
if not (y["id"] in seen or seen.add(y["id"]))
]
31 changes: 31 additions & 0 deletions collabnext/openalex/edges.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from pyalex import Author, Institution


def make_associated_institution_edges(institutions: list[Institution]) -> list[dict]:
return [
{
"id": f"""{x["id"]}-{y["id"]}""",
"start": x["id"],
"end": y["id"],
"label": "ASSOCIATED",
"start_type": "INSTITUTION",
"end_type": "INSTITUTION",
}
for x in institutions
for y in x["associated_institutions"]
]


def make_affiliated_author_edges(authors: list[Author]) -> list[dict]:
return [
{
"id": f"""{x["id"]}-{y["institution"]["id"]}""",
"start": x["id"],
"end": y["institution"]["id"],
"label": "AFFILIATED",
"start_type": "AUTHOR",
"end_type": "INSTITUTION",
}
for x in authors
for y in x["affiliations"]
]
23 changes: 23 additions & 0 deletions collabnext/openalex/institutions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from pyalex import Institution, Institutions


def get_institutions() -> list[Institution]:
# Get 5 random institutions for now
return [Institutions().random() for _ in range(5)]


def get_associated_institutions(institutions: list[Institution]) -> list[Institution]:
# Gather associated institutions
seen = set()
associated_institutions = [
y
for x in institutions
for y in x["associated_institutions"]
if not (y["id"] in seen or seen.add(y["id"]))
]
return associated_institutions


def dedup_institutions(institutions: list[Institution]) -> list[Institution]:
seen = set()
return [x for x in institutions if not (x["id"] in seen or seen.add(x["id"]))]
14 changes: 14 additions & 0 deletions collabnext/openalex/nodes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from pyalex import Author, Institution


def make_institution_nodes(institutions: list[Institution]) -> list[dict]:
return [
{"id": x["id"], "label": x["display_name"], "type": "INSTITUTION"}
for x in institutions
]


def make_author_nodes(authors: list[Author]) -> list[dict]:
return [
{"id": x["id"], "label": x["display_name"], "type": "AUTHOR"} for x in authors
]
Empty file removed knowhax/__init__.py
Empty file.
86 changes: 24 additions & 62 deletions observable/docs/data/graph.json.py
Original file line number Diff line number Diff line change
@@ -1,79 +1,41 @@
import json
import os

import pyalex
from dotenv import load_dotenv
from pyalex import Authors, Institutions
from collabnext.openalex.authors import get_affiliated_authors
from collabnext.openalex.edges import (
make_affiliated_author_edges,
make_associated_institution_edges,
)
from collabnext.openalex.institutions import (
dedup_institutions,
get_associated_institutions,
get_institutions,
)
from collabnext.openalex.nodes import make_author_nodes, make_institution_nodes

# Load Secrets
load_dotenv()
# Get institutions
institutions = get_institutions()

# Initialize the pyalex client
pyalex.config.email = os.getenv("OPENALEX_EMAIL")

# Get 5 random institutions
institutions = [Institutions().random() for _ in range(5)]

# Gather associated institutions
associated_institutions = [
y for x in institutions for y in x["associated_institutions"]
]
# Get associated institutions
associated_institutions = get_associated_institutions(institutions)

# Combine all unique institutions
seen = set()
all_institutions = [
x
for x in [*institutions, *associated_institutions]
if not (x["id"] in seen or seen.add(x["id"]))
]
all_institutions = dedup_institutions([*institutions, *associated_institutions])

# Create nodes
institution_nodes = [
{"id": x["id"], "label": x["display_name"], "type": "INSTITUTION"}
for x in all_institutions
]
institution_nodes = make_institution_nodes(all_institutions)

# Get unique affiliated authors
seen = set()
authors = [
y
for x in all_institutions
for y in Authors().filter(affiliations={"institution": {"id": x["id"]}}).get()
if not (y["id"] in seen or seen.add(y["id"]))
]
authors = get_affiliated_authors(all_institutions)

# Get unique authors affiliated with each institution
author_nodes = [
{"id": x["id"], "label": x["display_name"], "type": "AUTHOR"} for x in authors
]

nodes = [*institution_nodes, *author_nodes]
author_nodes = make_author_nodes(authors)

# Create associated institution edges
associated_institution_edges = [
{
"id": f"""{x["id"]}-{y["id"]}""",
"start": x["id"],
"end": y["id"],
"label": "ASSOCIATED",
"start_type": "INSTITUTION",
"end_type": "INSTITUTION",
}
for x in institutions
for y in x["associated_institutions"]
]
affiliated_author_edges = [
{
"id": f"""{x["id"]}-{y["institution"]["id"]}""",
"start": x["id"],
"end": y["institution"]["id"],
"label": "AFFILIATED",
"start_type": "AUTHOR",
"end_type": "INSTITUTION",
}
for x in authors
for y in x["affiliations"]
]
associated_institution_edges = make_associated_institution_edges(institutions)
affiliated_author_edges = make_affiliated_author_edges(authors)

# Group all nodes and edges together
nodes = [*institution_nodes, *author_nodes]
edges = [*associated_institution_edges, *affiliated_author_edges]

print(json.dumps({"nodes": nodes, "edges": edges}))
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ authors = ["Your Name <you@example.com>"]
license = "CC0 1.0 Universal"
readme = "README.md"
packages = [
{ include = "knowhax", from = "." }
{ include = "collabnext", from = "." }
]

[tool.poetry.dependencies]
Expand Down

0 comments on commit 8126cee

Please sign in to comment.