Skip to content
This repository has been archived by the owner on Jun 2, 2022. It is now read-only.

Commit

Permalink
Merge pull request #13 from MITLibraries/file-source
Browse files Browse the repository at this point in the history
Add support for reading from a file
  • Loading branch information
Mike Graves committed Aug 12, 2020
2 parents 3a39f7e + ddb4697 commit b89cd1f
Show file tree
Hide file tree
Showing 4 changed files with 178 additions and 16 deletions.
1 change: 1 addition & 0 deletions Pipfile
Expand Up @@ -20,6 +20,7 @@ requests = "*"
attrs = "*"
bs4 = "*"
sickle = "*"
smart-open = {extras = ["aws"], version = "*"}

[requires]
python_version = "3.8"
Expand Down
160 changes: 149 additions & 11 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 16 additions & 4 deletions hoard/cli.py
@@ -1,10 +1,12 @@
from typing import Optional
from typing import Iterator, Optional

import click
from smart_open import open # type: ignore

from hoard.api import Api
from hoard.client import DataverseClient, DataverseKey, OAIClient, Transport
from hoard.source import JPAL
from hoard.models import Dataset
from hoard.source import JPAL, LincolnLab


@click.group()
Expand All @@ -13,7 +15,7 @@ def main():


@main.command()
@click.argument("source", type=click.Choice(["jpal"], case_sensitive=False))
@click.argument("source", type=click.Choice(["jpal", "llab"], case_sensitive=False))
@click.argument("source_url")
@click.option("--key", "-k", help="RDR authentication key.")
@click.option(
Expand All @@ -34,11 +36,21 @@ def ingest(
parent: str,
verbose: bool,
) -> None:
"""Ingest a source into RDR.
This will load items from the specified source located at SOURCE_URL
into an RDR instance. SOURCE_URL can be either a URL or a local or S3
file URL, e.g. file:///path/to/file, s3://bucket/key.
"""
count = 0
rdr = DataverseClient(Api(url, DataverseKey(key)), Transport())
records: Iterator[Dataset]
if source == "jpal":
client = OAIClient(source_url, "dataverse_json", "Jameel_Poverty_Action_Lab")
records = JPAL(client)
records = JPAL(client)
elif source == "llab":
stream = open(source_url)
records = LincolnLab(stream)
for record in records:
dv_id, p_id = rdr.create(record, parent=parent)
if verbose:
Expand Down
13 changes: 12 additions & 1 deletion hoard/source.py
@@ -1,6 +1,6 @@
from bs4 import BeautifulSoup # type: ignore
import requests
from typing import Iterator
from typing import Iterator, TextIO


from hoard.client import DataverseClient, DSpaceClient, OAIClient
Expand Down Expand Up @@ -44,3 +44,14 @@ def __iter__(self) -> Iterator[Dataset]:

def __next__(self) -> Dataset:
...


class LincolnLab:
def __init__(self, stream: TextIO) -> None:
self.stream = stream

def __iter__(self) -> Iterator[Dataset]:
return self

def __next__(self) -> Dataset:
...

0 comments on commit b89cd1f

Please sign in to comment.