## Imports

In [None]:
# !pip install git+https://github.com/BlueBrain/nexus-forge

In [None]:
import getpass

In [None]:
import yaml

In [None]:
import pandas as pd

In [None]:
import numpy as np

In [None]:
import nexussdk as nxs

In [None]:
from kgforge.core import KnowledgeGraphForge

In [None]:
# TODO: Add image explaining the dataset

## Setup

In [None]:
ORGANIZATION = "tutorialnexus"

In [None]:
PROJECT = "YOUR PROJECT" # TODO: Add your project

In [None]:
DEPLOYMENT = "https://sandbox.bluebrainnexus.io/v1"

In [None]:
TOKEN = getpass.getpass()

In [None]:
context = {
  "@id": "https://context.org",
  "@context": {
    "@vocab": "https://sandbox.bluebrainnexus.io/v1/vocabs/",
    "schema": "http://schema.org/",
    "Movie": {
      "@id": "schema:Movie"
    },
    "Rating": {
      "@id": "schema:Rating"
    }
  }
}

In [None]:
nxs.config.set_environment(DEPLOYMENT)

In [None]:
nxs.config.set_token(TOKEN)

In [None]:
nxs.resources.create(ORGANIZATION, PROJECT, context)

In [None]:
config = {
    "Model": {
        "name": "RdfModel",
        "origin": "store",
        "source": "BlueBrainNexus",
        "context": {
            "iri": "https://context.org",
            "bucket": f"{ORGANIZATION}/{PROJECT}"
        }
    },
    "Store": {
        "name": "BlueBrainNexus",
        "endpoint": DEPLOYMENT,
        "versioned_id_template": "{x.id}?rev={x._store_metadata._rev}",
        "file_resource_mapping": "../../configurations/nexus-store/file-to-resource-mapping.hjson",
    },
    "Formatters": {
        "identifier": "https://movielens.org/{}/{}"
    }
}

In [None]:
with open("./config.yml", "w") as f:
    yaml.dump(config, f)

In [None]:
forge = KnowledgeGraphForge("./config.yml", token=TOKEN, bucket=f"{ORGANIZATION}/{PROJECT}")

## Data

In [None]:
!curl -s -O http://files.grouplens.org/datasets/movielens/ml-latest-small.zip && unzip -qq ml-latest-small.zip && cd ml-latest-small && ls

In [None]:
directory = "./ml-latest-small"

In [None]:
movies_df = pd.read_csv(f"{directory}/movies.csv")

In [None]:
ratings_df = pd.read_csv(f"{directory}/ratings.csv", dtype={"movieId":"string"})

In [None]:
tags_df = pd.read_csv(f"{directory}/tags.csv", dtype={"movieId":"string"})

In [None]:
links_df = pd.read_csv(f"{directory}/links.csv")

In [None]:
movies_links_df = pd.merge(movies_df, links_df, on='movieId')

## Resources

In [None]:
movies_links_df["type"] = "Movie"

In [None]:
ratings_df["type"] = "Rating"

In [None]:
tags_df["type"] = "Tag"

In [None]:
movies_links_df["id"] = movies_links_df["movieId"].apply(lambda x: forge.format("identifier", "movies", x))

In [None]:
movies_links_df["genres"] = movies_links_df["genres"].apply(lambda x: x.split("|"))

In [None]:
ratings_df["movieId.id"] = movies_links_df["movieId"].apply(lambda x: forge.format("identifier", "movies", x))

In [None]:
tags_df["movieId.id"] = tags_df["movieId"].apply(lambda x: forge.format("identifier", "movies", x))

In [None]:
movies_resources = forge.from_dataframe(movies_links_df, np.nan, ".")

In [None]:
ratings_resources = forge.from_dataframe(ratings_df, np.nan, ".")

In [None]:
tags_resources = forge.from_dataframe(tags_df, np.nan, ".")

In [None]:
print(movies_resources[0])

In [None]:
print(ratings_resources[0])

In [None]:
print(tags_resources[629])

## Register

In [None]:
forge.register(movies_resources)

In [None]:
forge.register(ratings_resources)

In [None]:
forge.register(tags_resources)

## Query

In [None]:
query = """
    PREFIX vocab: <https://sandbox.bluebrainnexus.io/v1/vocabs/>
    PREFIX nxv: <https://bluebrain.github.io/nexus/vocabulary/>
    SELECT ?id ?title
    WHERE {
        ?id a Movie ;
            nxv:deprecated false ;
            vocab:title ?title ;
            ^vocab:movieId / vocab:tag "thought-provoking" . 
    }
"""

In [None]:
resources = forge.sparql(query, limit=100, debug=True)

In [None]:
set(forge.as_dataframe(resources).title)

In [None]:
movie = forge.retrieve(resources[0].id)

In [None]:
print(movie)

In [None]:
movie._store_metadata