Skip to content

Commit

Permalink
add upload from single dir
Browse files Browse the repository at this point in the history
  • Loading branch information
atrisovic committed Nov 13, 2021
1 parent b72ea38 commit 4fd42c3
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 44 deletions.
5 changes: 4 additions & 1 deletion action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ inputs:
DATAVERSE_DATASET_DOI:
required: true
description: "existing dataverse dataset doi"
GITHUB_DIR:
required: false
description: "upload only from specific dirs"

runs:
using: "composite"
Expand All @@ -37,4 +40,4 @@ runs:
"${{inputs.DATAVERSE_SERVER}}"
"${{inputs.DATAVERSE_DATASET_DOI}}"
$GITHUB_REPOSITORY
-d "${{inputs.GITHUB_DIR}}"
103 changes: 60 additions & 43 deletions dataverse.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,69 @@
from pyDataverse.api import NativeApi, DataAccessApi
from pyDataverse.models import Datafile
from os.path import isdir, isfile, join
from os.path import isdir, join
from time import sleep
from os import walk
import argparse
import requests
import hashlib
import sys

dataverse_token = sys.argv[1]
dataverse_server = sys.argv[2].strip("/")
dataverse_dataset_doi = sys.argv[3]
github_repository = sys.argv[4]
def parse_arguments():
parser = argparse.ArgumentParser()

api = NativeApi(dataverse_server, dataverse_token)
data_api = DataAccessApi(dataverse_server)
# Mandatory arguments
parser.add_argument("token", help="Dataverse token.")
parser.add_argument("server", help="Dataverse server.")
parser.add_argument("doi", help="Dataset DOI.")
parser.add_argument("repo", help="GitHub repository.")

# the following deletes all the files in the dataset
# Optional arguments
parser.add_argument("-d", "--dir", help="Uploads only a specific dir.")

args = parser.parse_args()
return args

if __name__ == '__main__':
args = parse_arguments()
dataverse_server = args.server.strip("/")
api = NativeApi(dataverse_server , args.token)
data_api = DataAccessApi(dataverse_server)

# the following deletes all the files in the dataset

dataset = api.get_dataset(args.doi)
files_list = dataset.json()['data']['latestVersion']['files']

delete_api = dataverse_server + \
'/dvn/api/data-deposit/v1.1/swordv2/edit-media/file/'
for f in files_list:
fileid = f["dataFile"]["id"]
resp = requests.delete(
delete_api + str(fileid), \
auth = (args.token , ""))

# the following adds all files from the repository to Dataverse

path = join('repo',args.dir) if args.dir else 'repo'
for root, subdirs, files in walk(path):
if '.git' in subdirs:
subdirs.remove('.git')
if '.github' in subdirs:
subdirs.remove('.github')
for f in files:
df = Datafile()
df.set({
"pid" : args.doi,
"filename" : f,
"directoryLabel": root[5:],
"description" : \
"Uploaded with GitHub Action from {}.".format(
args.repo),
})
resp = api.upload_datafile(
args.doi, join(root,f), df.json())
sleep(0.05) # give some time to upload

# publish updated dataset

resp = api.publish_dataset(args.doi, release_type="major")

dataset = api.get_dataset(dataverse_dataset_doi)
files_list = dataset.json()['data']['latestVersion']['files']

delete_api = dataverse_server + \
'/dvn/api/data-deposit/v1.1/swordv2/edit-media/file/'
for f in files_list:
fileid = f["dataFile"]["id"]
resp = requests.delete(
delete_api + str(fileid), \
auth = (dataverse_token , ""))

# the following adds all files from the repository to Dataverse

for root, subdirs, files in walk('repo'):
if '.git' in subdirs:
subdirs.remove('.git')
if '.github' in subdirs:
subdirs.remove('.github')
for f in files:
df = Datafile()
df.set({
"pid" : dataverse_dataset_doi,
"filename" : f,
"directoryLabel": root[5:],
"description" : \
"Uploaded with GitHub Action from {}.".format(
github_repository),
})
resp = api.upload_datafile(
dataverse_dataset_doi, join(root,f), df.json())

# publish updated dataset

resp = api.publish_dataset(dataverse_dataset_doi, release_type="major")

0 comments on commit 4fd42c3

Please sign in to comment.