In [1]:
from utils import build_graph_from_github_org
from visualizer import visualize_graph

In [2]:
# provide authentification tokens for github api and orcid public api
github_token = "your-github-token"
orcid_token = "your-orcid-token"

# Provide the Name of the Github Organization and the corresponding ROR ID
github_org_name = "Materials-Data-Science-and-Informatics"
corresponding_ror_id = "02nv7yv05"

# Build Graph

In [3]:
help(build_graph_from_github_org)

Help on function build_graph_from_github_org in module utils:

build_graph_from_github_org(github_org_name, corresponding_ror_id, github_token, orcid_token)
    Builds an RDF graph representing a GitHub organization's repositories and contributors, 
    enriched with data from ORCID and ROR APIs. 
    
    Args:
        github_org_name (str): The GitHub organization name (e.g., "Materials-Data-Science-and-Informatics").
        corresponding_ror_id (str): The ROR (Research Organization Registry) identifier for the organization 
                                    (e.g., "https://ror.org/02nv7yv05").
        github_token (str): GitHub API access token for authenticating API requests.
        orcid_token (str): ORCID API access token for authenticating API requests.
    
    Returns:
        rdflib.Graph: An RDF graph containing information about the organization, its repositories, 
                      and contributors, represented in schema.org format.
    
    Notes:
        - The gr

In [4]:
graph = build_graph_from_github_org(github_org_name, corresponding_ror_id,
                    github_token, orcid_token)

2024-12-12 11:21:50,504 - build_graph_logs - INFO - Starting the process to build an RDF graph for the GitHub organization: Materials-Data-Science-and-Informatics.
2024-12-12 11:21:50,504 - build_graph_logs - INFO - Fetching organization data from ROR for ID: 02nv7yv05
2024-12-12 11:21:50,504 - build_graph_logs - INFO - Starting query to ROR API for ROR ID: 02nv7yv05
2024-12-12 11:21:50,504 - build_graph_logs - INFO - Starting request to https://api.ror.org/organizations/02nv7yv05 with headers: {'Accept': 'application/json'}
2024-12-12 11:21:50,867 - build_graph_logs - INFO - Successfully fetched data from https://api.ror.org/organizations/02nv7yv05 (Status: 200)
2024-12-12 11:21:50,867 - build_graph_logs - INFO - Successfully fetched ROR data for ROR ID: 02nv7yv05
2024-12-12 11:21:52,854 - build_graph_logs - INFO - Fetching GitHub organization details for: Materials-Data-Science-and-Informatics
2024-12-12 11:21:52,854 - build_graph_logs - INFO - Starting request to https://api.github.

# Save and Load

In [4]:
# save graph data
graph.serialize(destination="graph.ttl")

<Graph identifier=N2d1fefea00eb45e58223dfefbf97a181 (<class 'rdflib.graph.Graph'>)>

In [5]:
# or load a previously saved graph
from rdflib import Graph
graph = Graph()
graph.parse("graph.ttl")

<Graph identifier=N510f499adbbf451b8c43b722cec50a44 (<class 'rdflib.graph.Graph'>)>

# Visualize Graph

In [4]:
help(visualize_graph)

Help on function visualize_graph in module visualizer:

visualize_graph(graph, hide_literals=False, hide_BNodes=False, hide_labels=False, hide_type_nodes=False, sparql_query='', physics=False)
    Visualizes an RDFLib graph using PyVis, with options to filter nodes, edges, 
    and customize the visualization. The resulting interactive graph is displayed 
    as an IFrame and saved to an HTML file.
    
    Args:
        graph (rdflib.Graph): The RDFLib graph to visualize.
        hide_literals (bool): If True, hides nodes that are literals.
                              Information in literals is preserved in node titles.
        hide_BNodes (bool): If True, hides blank nodes (BNodes). 
                            Information in BNodes is preserved in node titles.
        hide_labels (bool): If True, hides labels on nodes and edges.
                            All information is available in the node titles.
        hide_type_nodes (bool): If True, hides nodes connected by the `rdf:ty

In [7]:
# I don't recommend to visualize the whole graph.
# It will take some time and look quit messy due to the large amount of triples
# Use the sparql_query to query what you actually want to see.
# Use  function arguments like 'hide_labels' or 'hide_literals' to simplify the graph
# see ecamples below
visualize_graph(graph, physics = False)

graph.html


## Get all Repos and IAS9

In [6]:
query = """
PREFIX schema: <http://schema.org/>

SELECT ?subject ?predicate ?object
WHERE {
  ?subject a schema:SoftwareSourceCode .
  ?subject ?predicate ?object .
  FILTER (?predicate = schema:sourceOrganisation)
}
"""
visualize_graph(graph, physics = True, hide_labels = True, sparql_query = query)

graph.html


## Get all Repos and its Contributor

In [6]:
query = """
PREFIX schema: <http://schema.org/>

SELECT ?subject ?predicate ?object
WHERE {
  ?subject a schema:SoftwareSourceCode .
  ?subject ?predicate ?object .
  FILTER (?predicate = schema:contributor)
}
"""
visualize_graph(graph, physics = True, sparql_query = query)

graph.html


## Get the Repo of a given Name + connected Nodes

In [7]:
query = """
PREFIX schema: <http://schema.org/>

SELECT ?subject ?predicate ?object
WHERE {
  ?software a schema:SoftwareSourceCode .
  ?software schema:name "metador-push" .

  ?software ?predicate ?object .
  
  BIND(?software AS ?subject) 
}

"""
visualize_graph(graph, physics = False, hide_labels = True,
                sparql_query = query)

graph.html


## View a specific Person

In [8]:
query = """
PREFIX schema: <http://schema.org/>

SELECT ?subject ?predicate ?object
WHERE {
  ?person a schema:Person .
  ?person schema:familyName "Hofmann" .

  ?person ?predicate ?object .
  
  BIND(?person AS ?subject) 
}
"""
visualize_graph(graph, physics = False, hide_labels = True,
                sparql_query = query)

graph.html


In [9]:
query = """
PREFIX schema: <http://schema.org/>

SELECT ?subject ?predicate ?object
WHERE {
  ?person a schema:Person .
  ?person schema:familyName "Hofmann" .
  
  {
    ?person ?predicate ?object .
    BIND(?person AS ?subject) 
  }
  UNION
  {
    ?subject ?predicate ?person .
    BIND(?person AS ?object) 
  }
}
"""
visualize_graph(graph, physics = False, hide_labels = True,
                sparql_query = query)

graph.html
