# Sinopia Entity Resource Template Classification

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import datetime
import os
import rdflib
import numpy as np
import pandas as pd
import tensorflow as tf
SINOPIA_BASE_PATH = "/Users/jpnelson/2019/sinopia-data/2019/09/24"

## Setup
Create two graphs, one for testing and one for training

In [3]:
LDP = rdflib.Namespace('http://www.w3.org/ns/ldp#')
SINOPIA_TRAIN = rdflib.ConjunctiveGraph()
SINOPIA_TRAIN.namespace_manager.bind("ldp", LDP)
SINOPIA_TEST = rdflib.ConjunctiveGraph()
SINOPIA_TEST.namespace_manager.bind("ldp", LDP)
SINOPIA_TESTING_PATH = "/Users/jpnelson/2019/sinopia-data/2019/09/24/test/"
for filename in next(os.walk(SINOPIA_TESTING_PATH))[2]:
    SINOPIA_TEST.parse(os.path.join(SINOPIA_TESTING_PATH, filename), format='turtle')
SINOPIA_TRAIN_PATH = "/Users/jpnelson/2019/sinopia-data/2019/09/24/train/"
for filename in next(os.walk(SINOPIA_TRAIN_PATH))[2]:
    SINOPIA_TRAIN.parse(os.path.join(SINOPIA_TRAIN_PATH, filename), format='turtle')


In [4]:
print(f"Testing triples: {len(SINOPIA_TEST):,}, Training triples: {len(SINOPIA_TRAIN):,}")

Testing triples: 1,081, Training triples: 5,637


Origin 8/28 Testing triples: 446, Training triples: 2,602

In [4]:
single_graph = rdflib.ConjunctiveGraph()
single_graph.parse("/Users/jpnelson/2019/sinopia-data/2019/09/24/train/00002.ttl", format='turtle')
print(single_graph.serialize(format='turtle').decode())

@prefix acl: <http://www.w3.org/ns/auth/acl#> .
@prefix as: <https://www.w3.org/ns/activitystreams#> .
@prefix dc: <http://purl.org/dc/terms/> .
@prefix dc11: <http://purl.org/dc/elements/1.1/> .
@prefix geo: <http://www.w3.org/2003/01/geo/wgs84_pos#> .
@prefix ldp: <http://www.w3.org/ns/ldp#> .
@prefix memento: <http://mementoweb.org/ns#> .
@prefix ns1: <http://sinopia.io/vocabulary/> .
@prefix ns2: <http://id.loc.gov/ontologies/bibframe/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix schema: <http://schema.org/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix time: <http://www.w3.org/2006/time#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<https://trellis.stage.sinopia.io/repository/ucdavis/f179f138-8d01-47ac-8c9d-0e9c4ff7b3dd> a ns2

In [5]:
BF = rdflib.Namespace("http://id.loc.gov/ontologies/bibframe/")

In [32]:
def rdf_series(graph, subject):
    data = [str(subject)]
    index = ['subject']
    for p,o in graph.predicate_objects(subject=subject):
        # Predicate is added to index
        index.append(str(p))
        # Object is store in data
        data.append(str(o))
    return pd.Series(data, index)

def load_graph(graph):
    data_series = []
    # Loads triples into subject data series
    for subject in set(graph.subjects()):
        data_series.append(rdf_series(graph, subject))
    # For now returns a list, should create data frames for each rdf:type
    return data_series

In [27]:
first = rdf_series(single_graph, 
                   rdflib.URIRef("https://trellis.stage.sinopia.io/repository/ucdavis/f179f138-8d01-47ac-8c9d-0e9c4ff7b3dd"))

In [33]:
data_frames = load_graph(single_graph)

In [34]:
data_frames

[subject                                                          fb4f24525b74e4965945a4a5b9be99f31b1
 http://www.w3.org/1999/02/22-rdf-syntax-ns#type         https://www.w3.org/ns/activitystreams#Create
 http://www.w3.org/1999/02/22-rdf-syntax-ns#type                   http://www.w3.org/ns/prov#Activity
 http://www.w3.org/ns/prov#atTime                                    2019-08-12T23:35:46.808000+00:00
 http://www.w3.org/ns/prov#wasAssociatedWith        https://cognito-idp.us-west-2.amazonaws.com/us...
 dtype: object,
 subject                                             https://trellis.stage.sinopia.io/repository/uc...
 http://www.w3.org/1999/02/22-rdf-syntax-ns#type     http://id.loc.gov/ontologies/bibframe/Abbrevia...
 http://sinopia.io/vocabulary/hasResourceTemplate         sinopia:resourceTemplate:bf2:Title:AbbrTitle
 http://www.w3.org/ns/prov#wasGeneratedBy                          fb4f24525b74e4965945a4a5b9be99f31b1
 http://id.loc.gov/ontologies/bibframe/mainTitle       Henry's