Skip to content

Commit

Permalink
Merge pull request #13 from OKN-CollabNext/add-topics
Browse files Browse the repository at this point in the history
Add topic nodes
  • Loading branch information
kaaloo committed Apr 17, 2024
2 parents 2075e34 + 34b20a9 commit 9e9ce35
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 45 deletions.
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,15 @@ Build the static web site locally.
invoke build
```

Manually case a graph.json refresh. This is needed because currently
observable framework doesn't notice if a dependent python module
has been changed when developing. It only monitors changes to
the particular page that is being displayed.

```bash
invoke touch
```

Delete local git branches that have already been merged.

```bash
Expand Down
17 changes: 16 additions & 1 deletion collabnext/openalex/edges.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def make_affiliated_author_edges(authors: list[Author]) -> list[dict]:
]


def make_work_author_edges(works: list[Work]) -> list[dict]:
def make_author_work_edges(works: list[Work]) -> list[dict]:
return [
{
"id": f"{work['id']}-{authorship['author']['id']}",
Expand All @@ -44,3 +44,18 @@ def make_work_author_edges(works: list[Work]) -> list[dict]:
for work in works
for authorship in work.get("authorships", [])
]


def make_work_topic_edges(works: list[Work]) -> list[dict]:
return [
{
"id": f"{work['id']}-{topic['id']}",
"start": work["id"],
"end": topic["id"],
"label": "TOPIC",
"start_type": "WORK",
"end_type": "TOPIC",
}
for work in works
for topic in work["topics"]
]
17 changes: 0 additions & 17 deletions collabnext/openalex/institutions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,3 @@
def get_institutions() -> list[Institution]:
# Get 3 random institutions for now
return [Institutions().random() for _ in range(3)]


def get_associated_institutions(institutions: list[Institution]) -> list[Institution]:
# Gather associated institutions
seen = set()
associated_institutions = [
y
for x in institutions
for y in x["associated_institutions"]
if not (y["id"] in seen or seen.add(y["id"]))
]
return associated_institutions


def dedup_institutions(institutions: list[Institution]) -> list[Institution]:
seen = set()
return [x for x in institutions if not (x["id"] in seen or seen.add(x["id"]))]
18 changes: 15 additions & 3 deletions collabnext/openalex/nodes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pyalex import Author, Institution, Work
from pyalex import Author, Institution, Topic, Work


def make_institution_nodes(institutions: list[Institution]) -> list[dict]:
Expand All @@ -15,6 +15,18 @@ def make_author_nodes(authors: list[Author]) -> list[dict]:


def make_work_nodes(works: list[Work]) -> list[dict]:
return [{"id": x["id"], "label": x["title"], "type": "WORK"} for x in works]


def make_topic_nodes(topics: list[Topic]) -> list[dict]:
seen = set()
return [
{"id": x["id"], "label": x["title"], "type": "WORK"} for x in works
]
{
"id": x["id"],
"label": x["field"]["display_name"],
"type": "TOPIC",
}
for x in topics
# Note that topics are grouped by field
if not (x["field"]["id"] in seen or seen.add(x["field"]["id"]))
]
11 changes: 11 additions & 0 deletions collabnext/openalex/topics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from pyalex import Topic, Work


def get_work_topics(works: list[Work]) -> list[Topic]:
seen = set()
return [
y
for x in works
for y in x["topics"]
if not (y["id"] in seen or seen.add(y["id"]))
]
49 changes: 26 additions & 23 deletions observable/docs/data/graph.json.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,36 @@
import json

from collabnext.openalex.authors import get_affiliated_authors
from collabnext.openalex.works import get_works_by_authors
from collabnext.openalex.edges import (
make_affiliated_author_edges,
make_associated_institution_edges,
make_work_author_edges,
make_author_work_edges,
make_work_topic_edges,
)
from collabnext.openalex.institutions import (
dedup_institutions,
get_associated_institutions,
get_institutions,
)
from collabnext.openalex.nodes import (
make_author_nodes,
make_institution_nodes,
make_work_nodes,
make_author_nodes,
make_institution_nodes,
make_topic_nodes,
make_work_nodes,
)
from collabnext.openalex.topics import get_work_topics
from collabnext.openalex.works import get_works_by_authors

# Get institutions
institutions = get_institutions()

# Get associated institutions
associated_institutions = get_associated_institutions(institutions)

# Combine all unique institutions
all_institutions = dedup_institutions([*institutions, *associated_institutions])

# Create nodes
institution_nodes = make_institution_nodes(all_institutions)
institution_nodes = make_institution_nodes(institutions)

# Get unique affiliated authors
authors = get_affiliated_authors(all_institutions)
authors = get_affiliated_authors(institutions)

# Get unique authors affiliated with each institution
# Get all authors affiliated with each institution
author_nodes = make_author_nodes(authors)

# Create associated institution edges
associated_institution_edges = make_associated_institution_edges(institutions)
# Create instutition edges
affiliated_author_edges = make_affiliated_author_edges(authors)

# Get works by authors
Expand All @@ -47,10 +40,20 @@
work_nodes = make_work_nodes(works)

# Create work author edges
work_author_edges = make_work_author_edges(works)
work_author_edges = make_author_work_edges(works)

# Get topics from works
topics = get_work_topics(works)

# Create topic nodes
topic_nodes = make_topic_nodes(topics)

# Create work topic edges
work_topic_edges = make_work_topic_edges(works)


# Group all nodes and edges together
nodes = [*institution_nodes, *author_nodes, *work_nodes]
edges = [*associated_institution_edges, *affiliated_author_edges, *work_author_edges]
nodes = [*institution_nodes, *author_nodes, *work_nodes, *topic_nodes]
edges = [*affiliated_author_edges, *work_author_edges, *work_topic_edges]

print(json.dumps({"nodes": nodes, "edges": edges}))
print(json.dumps({"nodes": nodes, "edges": edges}))
11 changes: 10 additions & 1 deletion observable/docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ orb.data.setDefaultStyle({
color: "#0df2c9",
zIndex: 1,
};
}
}

if (node.data.type === "WORK") {
return {
Expand All @@ -67,6 +67,15 @@ orb.data.setDefaultStyle({
};
}

if (node.data.type === "TOPIC") {
return {
...basicStyle,
size: 10,
color: "#f8ee35",
zIndex: 1,
};
}

return {
...basicStyle,
};
Expand Down
6 changes: 6 additions & 0 deletions tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,9 @@ def deploy(c):
@task
def clean_branches(c):
c.run("git branch | grep -v 'main' | xargs git branch -D")


@task
def touch(c):
with cwd("observable/docs/data"):
c.run("touch graph.json.py")

0 comments on commit 9e9ce35

Please sign in to comment.