# Listar datasets

In [1]:
import requests
from collections import namedtuple

In [2]:
p = requests.get('http://datahub.io/api/3/action/package_list')

In [3]:
p.status_code

200

In [4]:
json = p.json()

In [5]:
json.keys()

[u'help', u'success', u'result']

Por padrão, ckan retorna status 200, devemos verificar o sucesso na resposta:

In [6]:
json['success']

True

In [7]:
lis = json['result']

In [8]:
lis[:10]

[u'1033-prog',
 u'10leading-discharges-alive-and-dead-by-principal-diagnosis-type-of-hospital-nationality-and-sex-2009',
 u'12',
 u'13',
 u'1790-2010-historical-population-york-county-virginia',
 u'1855spanishrailways',
 u'1902-norfolk-virginia-geopdf',
 u'1921-newport-news-virginia-geopdf',
 u'1944-norfolk-south-virginia-geopdf',
 u'1948-norfolk-south-virginia-geopdf']

Filtrar ieee

In [9]:
[x for x in lis if 'ieee' in x]

[u'rkb-explorer-ieee', u'twc-ieeevis']

In [10]:
ieee = _[0]

# Pegar informações de dataset

In [11]:
name = ieee
p2 = requests.get('http://datahub.io/api/3/action/package_show?id={}'.format(name))

In [12]:
p2.status_code

200

In [13]:
json2 = p2.json()

In [14]:
json2.keys()

[u'help', u'success', u'result']

In [15]:
json2['success']

True

In [16]:
dataset = json2['result']

In [17]:
dataset.keys()

[u'license_title',
 u'maintainer',
 u'relationships_as_object',
 u'private',
 u'maintainer_email',
 u'revision_timestamp',
 u'id',
 u'metadata_created',
 u'metadata_modified',
 u'author',
 u'author_email',
 u'state',
 u'version',
 u'license_id',
 u'type',
 u'resources',
 u'num_resources',
 u'tags',
 u'tracking_summary',
 u'groups',
 u'organization',
 u'relationships_as_subject',
 u'num_tags',
 u'name',
 u'isopen',
 u'url',
 u'notes',
 u'owner_org',
 u'extras',
 u'title',
 u'revision_id']

In [18]:
dataset[u'revision_timestamp']

u'2013-10-10T23:27:21.946709'

## O ID é outro ponto de acesso

O ID também pode ser usado no http://datahub.io/api/3/action/package_show?id={}

No datahub, alguns relacionamentos usam 'id', outros usam 'name'
No RDF, os relacionamentos usam 'url'

In [19]:
dataset['id']

u'feec8014-10d0-47c1-9f4d-eed33dc68d83'

In [20]:
dataset['name']

u'rkb-explorer-ieee'

In [21]:
dataset['url']

u'http://ieee.rkbexplorer.com'

## Obter links em "Additional Info"

In [22]:
Link = namedtuple('Link', 'name count')
additional_info = dataset['extras']
is_link = lambda x: x['key'].startswith('links:')
link_tuple = lambda x: Link(x['key'][6:], int(x['value']))

links = [link_tuple(x) for x in additional_info if is_link(x)]
links

[Link(name=u'rkb-explorer-acm', count=2949),
 Link(name=u'rkb-explorer-citeseer', count=1182),
 Link(name=u'rkb-explorer-curriculum', count=2),
 Link(name=u'rkb-explorer-dblp', count=5867),
 Link(name=u'rkb-explorer-dotac', count=50),
 Link(name=u'rkb-explorer-eprints', count=643),
 Link(name=u'rkb-explorer-ibm', count=29),
 Link(name=u'rkb-explorer-kisti', count=516),
 Link(name=u'rkb-explorer-laas', count=97),
 Link(name=u'rkb-explorer-newcastle', count=73),
 Link(name=u'rkb-explorer-nsf', count=1),
 Link(name=u'rkb-explorer-oai', count=417),
 Link(name=u'rkb-explorer-pisa', count=18),
 Link(name=u'rkb-explorer-rae2001', count=17),
 Link(name=u'rkb-explorer-resex', count=6),
 Link(name=u'rkb-explorer-risks', count=3),
 Link(name=u'rkb-explorer-roma', count=3),
 Link(name=u'rkb-explorer-southampton', count=7),
 Link(name=u'rkb-explorer-ulm', count=5),
 Link(name=u'rkb-explorer-wiki', count=9)]

## Obter links em propriedades de relationships

In [23]:
Relationship = namedtuple('Relationship', 'id comment type object subject')
as_object = dataset['relationships_as_object']

relation = lambda x: Relationship(x['id'], x['comment'], x['type'],
                                  x['__extras']['object_package_id'],
                                  x['__extras']['subject_package_id'])
relationships_as_object = list(map(relation, as_object))
relationships_as_object

[Relationship(id=u'525f35ed-c2e7-4798-983c-d7723909419a', comment=u'30', type=u'links_to', object=u'feec8014-10d0-47c1-9f4d-eed33dc68d83', subject=u'6e829250-dd11-45d2-bdcc-15281e1bda21'),
 Relationship(id=u'1509e4aa-d34b-47a8-8702-6eff8a9487ed', comment=u'561', type=u'links_to', object=u'feec8014-10d0-47c1-9f4d-eed33dc68d83', subject=u'6088e558-4e1c-43e8-bd8e-73a36a8d7a5a'),
 Relationship(id=u'a838bbb7-cd3c-4e7d-9ca8-f5352841ea66', comment=u'6', type=u'links_to', object=u'feec8014-10d0-47c1-9f4d-eed33dc68d83', subject=u'77b1e682-7595-470c-b222-40561b0f0f32'),
 Relationship(id=u'4bd5b9a0-7b03-4a33-8977-3a32e023d1af', comment=u'mika_i_zika', type=u'depends_on', object=u'feec8014-10d0-47c1-9f4d-eed33dc68d83', subject=u'15f90bfb-75fa-4825-a384-cff14d4f5dca')]

In [24]:
as_subject = dataset[u'relationships_as_subject']
relationships_as_subject = list(map(relation, as_subject))
relationships_as_subject

[Relationship(id=u'f93608f6-8cc8-4596-9159-35f6587058eb', comment=u'1', type=u'links_to', object=u'6954f22f-2db9-49b6-afe7-b73d46338075', subject=u'feec8014-10d0-47c1-9f4d-eed33dc68d83'),
 Relationship(id=u'cb1d2be0-2e66-4ab1-b263-19a7803047e5', comment=u'1182', type=u'links_to', object=u'382a8681-733a-420b-9420-ee71f09fb0af', subject=u'feec8014-10d0-47c1-9f4d-eed33dc68d83'),
 Relationship(id=u'e60dca89-2764-4bbb-9436-d030c737b9cb', comment=u'17', type=u'links_to', object=u'11830d16-fc34-49ac-acbb-4ffee77f2398', subject=u'feec8014-10d0-47c1-9f4d-eed33dc68d83'),
 Relationship(id=u'7e24475f-4314-4928-8259-81e79c2f98d7', comment=u'18', type=u'links_to', object=u'd0459637-cf13-4217-a245-6bc7139552a3', subject=u'feec8014-10d0-47c1-9f4d-eed33dc68d83'),
 Relationship(id=u'a40b95a5-a007-44a0-b9be-117774950bee', comment=u'2', type=u'links_to', object=u'7cfc8b4d-11d6-428b-929e-33d32f0e3d37', subject=u'feec8014-10d0-47c1-9f4d-eed33dc68d83'),
 Relationship(id=u'e6308565-0121-42c6-a2a5-c3be8836fccf

## Obter recursos

In [25]:
Resource = namedtuple('Resource', 'id revision_id description format url')

resource = lambda x: Resource(x['id'], x['revision_id'], 
                              x['description'], x['format'], x['url'])
resources = list(map(resource, dataset['resources']))
resources

[Resource(id=u'a21ae558-a78c-4de0-aeb3-065dff8b216a', revision_id=u'09be97c3-605f-4ca6-aa92-8940208b2b70', description=u'SPARQL endpoint', format=u'api/sparql', url=u'http://ieee.rkbexplorer.com/sparql/'),
 Resource(id=u'326f75fa-6a3d-4fa7-9f4c-40beb671600a', revision_id=u'09be97c3-605f-4ca6-aa92-8940208b2b70', description=u'XML Sitemap', format=u'meta/sitemap', url=u'http://ieee.rkbexplorer.com/sitemap.xml'),
 Resource(id=u'3614a6bc-96c3-40a8-85e9-c80356a5f4da', revision_id=u'09be97c3-605f-4ca6-aa92-8940208b2b70', description=u'voiD file', format=u'meta/void', url=u'http://ieee.rkbexplorer.com/models/void.ttl'),
 Resource(id=u'79633a5f-b262-4003-900a-240171f31bcc', revision_id=u'09be97c3-605f-4ca6-aa92-8940208b2b70', description=u'Example (RDF/XML)', format=u'example/rdf+xml', url=u'http://ieee.rkbexplorer.com/id/person-21757c2767705194600b55ff6b0ef692-1e427d6bbb6d2bb2aa5434059d6c58f4'),
 Resource(id=u'55544720-70b9-4aae-b055-ef6111515514', revision_id=u'09be97c3-605f-4ca6-aa92-894020

Obter VOID

In [26]:
[x for x in resources if x.format == 'meta/void'][0]

Resource(id=u'3614a6bc-96c3-40a8-85e9-c80356a5f4da', revision_id=u'09be97c3-605f-4ca6-aa92-8940208b2b70', description=u'voiD file', format=u'meta/void', url=u'http://ieee.rkbexplorer.com/models/void.ttl')

Obter RDF

In [27]:
[x for x in resources if x.format == 'application/rdf+xml'][0]

Resource(id=u'55544720-70b9-4aae-b055-ef6111515514', revision_id=u'09be97c3-605f-4ca6-aa92-8940208b2b70', description=u'', format=u'application/rdf+xml', url=u'http://ieee.rkbexplorer.com/models/dump.tgz')

In [28]:
download = requests.get(_.url)

ConnectionError: ('Connection aborted.', gaierror(-2, 'Name or service not known'))

In [29]:
download = requests.get(_26.url)

ConnectionError: ('Connection aborted.', gaierror(-2, 'Name or service not known'))

### Links da ieee-rkbexplorer estão offline =/