Skip to content

Commit

Permalink
Add academic analytics feed
Browse files Browse the repository at this point in the history
  • Loading branch information
Mike Graves committed Apr 4, 2016
1 parent b677ead commit 582374a
Show file tree
Hide file tree
Showing 8 changed files with 151 additions and 11 deletions.
2 changes: 1 addition & 1 deletion carbon/__init__.py
Expand Up @@ -8,4 +8,4 @@

__version__ = '0.2.0'

from .app import people, person_feed
from .app import people, person_feed, articles, article_feed
44 changes: 43 additions & 1 deletion carbon/app.py
Expand Up @@ -8,7 +8,7 @@
from lxml import etree as ET
from sqlalchemy import func, select

from carbon.db import persons, orcids, dlcs, engine
from carbon.db import persons, orcids, dlcs, engine, aa_articles

AREAS = (
'ARCHITECTURE & PLANNING AREA', 'ENGINEERING AREA',
Expand Down Expand Up @@ -66,6 +66,17 @@ def people():
yield dict(zip(row.keys(), row))


def articles():
"""An article generator.
Returns an iterator over the AA_ARTICLE table.
"""
sql = select([aa_articles])
with closing(engine().connect()) as conn:
for row in conn.execute(sql):
yield dict(zip(row.keys(), row))


def initials(*args):
"""Turn `*args` into a space-separated string of initials.
Expand Down Expand Up @@ -134,6 +145,37 @@ def person_feed(out):
yield partial(_add_person, xf)


@contextmanager
def article_feed(out):
"""Generate XML feed of articles."""
with ET.xmlfile(out, encoding='UTF-8') as xf:
xf.write_declaration()
with xf.element('articles'):
yield partial(_add_article, xf)


def _add_article(xf, article):
record = ET.Element('article')
add_child(record, 'AA_MATCH_SCORE', str(article['AA_MATCH_SCORE']))
add_child(record, 'ARTICLE_ID', article['ARTICLE_ID'])
add_child(record, 'ARTICLE_TITLE', article['ARTICLE_TITLE'])
add_child(record, 'ARTICLE_YEAR', article['ARTICLE_YEAR'])
add_child(record, 'AUTHORS', article['AUTHORS'])
add_child(record, 'DOI', article['DOI'])
add_child(record, 'ISSN_ELECTRONIC', article['ISSN_ELECTRONIC'])
add_child(record, 'ISSN_PRINT', article['ISSN_PRINT'])
add_child(record, 'IS_CONFERENCE_PROCEEDING',
article['IS_CONFERENCE_PROCEEDING'])
add_child(record, 'JOURNAL_FIRST_PAGE', article['JOURNAL_FIRST_PAGE'])
add_child(record, 'JOURNAL_LAST_PAGE', article['JOURNAL_LAST_PAGE'])
add_child(record, 'JOURNAL_ISSUE', article['JOURNAL_ISSUE'])
add_child(record, 'JOURNAL_VOLUME', article['JOURNAL_VOLUME'])
add_child(record, 'JOURNAL_NAME', article['JOURNAL_NAME'])
add_child(record, 'MIT_ID', article['MIT_ID'])
add_child(record, 'PUBLISHER', article['PUBLISHER'])
xf.write(record)


def _add_person(xf, person):
record = ET.Element('record')
add_child(record, 'field', person['MIT_ID'], name='[Proprietary_ID]')
Expand Down
8 changes: 6 additions & 2 deletions carbon/cli.py
Expand Up @@ -3,7 +3,7 @@

import click

from carbon import people, person_feed
from carbon import people, person_feed, articles, article_feed
from carbon.db import engine


Expand All @@ -23,10 +23,14 @@ def main(db, feed_type, out):
The feed will be printed to stdout if OUT is not specified.
FEED_TYPE should be 'people'.
FEED_TYPE should be 'people' or 'articles'.
"""
engine.configure(db)
if feed_type == 'people':
with person_feed(out) as f:
for person in people():
f(person)
elif feed_type == 'articles':
with article_feed(out) as f:
for article in articles():
f(article)
22 changes: 21 additions & 1 deletion carbon/db.py
Expand Up @@ -2,7 +2,7 @@
from __future__ import absolute_import

from sqlalchemy import (create_engine, Table, Column, String, Date, MetaData,
ForeignKey)
ForeignKey, Numeric, Text)


metadata = MetaData()
Expand Down Expand Up @@ -36,6 +36,26 @@
Column('ORCID', String))


aa_articles = Table('AA_ARTICLE', metadata,
Column('AA_MATCH_SCORE', Numeric(3, 1)),
Column('ARTICLE_ID', String),
Column('ARTICLE_TITLE', String),
Column('ARTICLE_YEAR', String),
Column('AUTHORS', Text),
Column('DOI', String),
Column('ISSN_ELECTRONIC', String),
Column('ISSN_PRINT', String),
Column('IS_CONFERENCE_PROCEEDING', String),
Column('JOURNAL_FIRST_PAGE', String),
Column('JOURNAL_LAST_PAGE', String),
Column('JOURNAL_ISSUE', String),
Column('JOURNAL_NAME', String),
Column('JOURNAL_VOLUME', String),
Column('MIT_ID', String),
Column('PUBLISHER', String),
)


class Engine(object):
"""Database engine.
Expand Down
42 changes: 39 additions & 3 deletions tests/conftest.py
Expand Up @@ -3,11 +3,11 @@
from contextlib import closing
import os

from lxml.builder import ElementMaker
from lxml.builder import ElementMaker, E as B
import pytest
import yaml

from carbon.db import engine, metadata, persons, orcids, dlcs
from carbon.db import engine, metadata, persons, orcids, dlcs, aa_articles


@pytest.fixture(scope="session", autouse=True)
Expand All @@ -26,21 +26,33 @@ def records():
return r


@pytest.fixture(scope="session")
def aa_data():
current_dir = os.path.dirname(os.path.realpath(__file__))
data = os.path.join(current_dir, 'fixtures/articles.yml')
with open(data) as fp:
r = list(yaml.load_all(fp))
return r


@pytest.yield_fixture
def load_data(records):
def load_data(records, aa_data):
with closing(engine().connect()) as conn:
conn.execute(persons.delete())
conn.execute(orcids.delete())
conn.execute(dlcs.delete())
conn.execute(aa_articles.delete())
for r in records:
conn.execute(persons.insert(), r['person'])
conn.execute(orcids.insert(), r['orcid'])
conn.execute(dlcs.insert(), r['dlc'])
conn.execute(aa_articles.insert(), aa_data)
yield
with closing(engine().connect()) as conn:
conn.execute(persons.delete())
conn.execute(orcids.delete())
conn.execute(dlcs.delete())
conn.execute(aa_articles.delete())


@pytest.fixture
Expand Down Expand Up @@ -90,3 +102,27 @@ def xml_data(E, xml_records):
def E():
return ElementMaker(namespace='http://www.symplectic.co.uk/hrimporter',
nsmap={None: 'http://www.symplectic.co.uk/hrimporter'})


@pytest.fixture
def articles_data(aa_data):
return B.articles(
B.article(
B.AA_MATCH_SCORE('0.9'),
B.ARTICLE_ID('1234567'),
B.ARTICLE_TITLE('Interaction between hatsopoulos microfluids and the Yawning Abyss of Chaos.'),
B.ARTICLE_YEAR('1999'),
B.AUTHORS('McRandallson, Randall M.|Lord, Dark'),
B.DOI('10.0000/1234LETTERS56'),
B.ISSN_ELECTRONIC('0987654'),
B.ISSN_PRINT('01234567'),
B.IS_CONFERENCE_PROCEEDING('0'),
B.JOURNAL_FIRST_PAGE('666'),
B.JOURNAL_LAST_PAGE('666'),
B.JOURNAL_ISSUE('10'),
B.JOURNAL_VOLUME('1'),
B.JOURNAL_NAME('Bunnies'),
B.MIT_ID('123456789'),
B.PUBLISHER('MIT Press')
)
)
17 changes: 17 additions & 0 deletions tests/fixtures/articles.yml
@@ -0,0 +1,17 @@
---
AA_MATCH_SCORE: 0.9
ARTICLE_ID: '1234567'
ARTICLE_TITLE: Interaction between hatsopoulos microfluids and the Yawning Abyss of Chaos.
ARTICLE_YEAR: '1999'
AUTHORS: McRandallson, Randall M.|Lord, Dark
DOI: 10.0000/1234LETTERS56
ISSN_ELECTRONIC: '0987654'
ISSN_PRINT: '01234567'
IS_CONFERENCE_PROCEEDING: '0'
JOURNAL_FIRST_PAGE: '666'
JOURNAL_LAST_PAGE: '666'
JOURNAL_ISSUE: '10'
JOURNAL_VOLUME: '1'
JOURNAL_NAME: Bunnies
MIT_ID: '123456789'
PUBLISHER: MIT Press
18 changes: 16 additions & 2 deletions tests/test_app.py
Expand Up @@ -5,8 +5,9 @@
from lxml import etree as ET
import pytest

from carbon import people
from carbon.app import person_feed, ns, NSMAP, add_child, initials
from carbon import people, articles
from carbon.app import (person_feed, ns, NSMAP, add_child, initials,
article_feed,)


pytestmark = pytest.mark.usefixtures('load_data')
Expand Down Expand Up @@ -101,3 +102,16 @@ def test_person_feed_uses_utf8_encoding(records, xml_records, E):
f(r)
assert b.getvalue() == ET.tostring(xml, encoding="UTF-8",
xml_declaration=True)


def test_articles_generates_articles():
arts = list(articles())
assert 'Yawning Abyss of Chaos' in arts[0]['ARTICLE_TITLE']


def test_article_feed_adds_article(aa_data, articles_data):
b = BytesIO()
with article_feed(b) as f:
f(aa_data[0])
assert b.getvalue() == ET.tostring(articles_data, encoding='UTF-8',
xml_declaration=True)
9 changes: 8 additions & 1 deletion tests/test_cli.py
Expand Up @@ -16,8 +16,15 @@ def runner():
return CliRunner()


def test_hr_returns_people(runner, E, xml_data):
def test_people_returns_people(runner, xml_data):
res = runner.invoke(main, ['sqlite://', 'people'])
assert res.exit_code == 0
assert res.output_bytes == \
ET.tostring(xml_data, encoding="UTF-8", xml_declaration=True)


def test_articles_returns_articles(runner, articles_data):
res = runner.invoke(main, ['sqlite://', 'articles'])
assert res.exit_code == 0
assert res.output_bytes == \
ET.tostring(articles_data, encoding='UTF-8', xml_declaration=True)

0 comments on commit 582374a

Please sign in to comment.