11import datetime
22from functools import partial
33import operator
4+ import os
45import requests
56import time
67
@@ -24,6 +25,7 @@ def __init__(self, url):
2425 logger .info ('Initializing client' )
2526
2627 def authenticate (self , email , password ):
28+ """Authenticate user to DSpace API."""
2729 header = self .header
2830 data = {'email' : email , 'password' : password }
2931 session = requests .post (f'{ self .url } /login' , headers = header ,
@@ -54,6 +56,7 @@ def get_record(self, uuid, rec_type):
5456
5557 def filtered_item_search (self , key , string , query_type ,
5658 selected_collections = '' ):
59+ """Performs a search against the filtered items endpoint."""
5760 offset = 0
5861 items = ''
5962 item_links = []
@@ -63,10 +66,9 @@ def filtered_item_search(self, key, string, query_type,
6366 'query_val[]' : string , '&collSel[]' :
6467 selected_collections , 'limit' : 200 , 'offset' : offset }
6568 logger .info (params )
66- print (endpoint )
6769 response = requests .get (endpoint , headers = self .header ,
6870 params = params , cookies = self .cookies )
69- print (f'Response url: { response .url } ' )
71+ logger . info (f'Response url: { response .url } ' )
7072 response = response .json ()
7173 items = response ['items' ]
7274 for item in items :
@@ -75,6 +77,7 @@ def filtered_item_search(self, key, string, query_type,
7577 return item_links
7678
7779 def post_coll_to_comm (self , comm_handle , coll_name ):
80+ """Posts a collection to a specified community."""
7881 endpoint = f'{ self .url } /handle/{ comm_handle } '
7982 community = requests .get (endpoint , headers = self .header ,
8083 cookies = self .cookies ).json ()
@@ -83,7 +86,52 @@ def post_coll_to_comm(self, comm_handle, coll_name):
8386 endpoint2 = f'{ self .url } /communities/{ comm_id } /collections'
8487 coll_id = requests .post (endpoint2 , headers = self .header ,
8588 cookies = self .cookies , json = collection ).json ()
86- return coll_id ['link' ]
89+ coll_id = coll_id ['uuid' ]
90+ logger .info (f'Collection posted: { coll_id } ' )
91+ return coll_id
92+
93+ def post_items_to_coll (self , coll_id , coll_metadata , file_dict ,
94+ ingest_type ):
95+ """Posts items to a specified collection."""
96+ for item_metadata in coll_metadata :
97+ file_exists = ''
98+ for element in [e for e in item_metadata ['metadata' ]
99+ if e ['key' ] == 'file_identifier' ]:
100+ file_identifier = element ['value' ]
101+ item_metadata ['metadata' ].remove (element )
102+ for k in [e for e in file_dict if file_identifier in e ]:
103+ file_exists = True
104+ if file_exists is True :
105+ endpoint = f'{ self .url } /collections/{ coll_id } /items'
106+ item_id = requests .post (endpoint , headers = self .header ,
107+ cookies = self .cookies ,
108+ json = item_metadata ).json ()
109+ item_id = item_id ['uuid' ]
110+ bit_ids = self .post_bitstreams_to_item (item_id ,
111+ file_identifier ,
112+ file_dict , ingest_type )
113+ for bit_id in bit_ids :
114+ logger .info (f'Bitstream posted: { bit_id } ' )
115+ yield item_id
116+
117+ def post_bitstreams_to_item (self , item_id , file_identifier , file_dict ,
118+ ingest_type ):
119+ """Posts bitstreams to a specified item."""
120+ for k , v in file_dict .items ():
121+ if k .startswith (file_identifier ):
122+ bitstream = file_dict [k ]
123+ file_name = os .path .basename (bitstream )
124+ if ingest_type == 'local' :
125+ data = open (bitstream , 'rb' )
126+ elif ingest_type == 'remote' :
127+ data = requests .get (bitstream )
128+ endpoint = (f'{ self .url } /items/{ item_id } '
129+ + f'/bitstreams?name={ file_name } ' )
130+ header_upload = {'accept' : 'application/json' }
131+ bit_id = requests .post (endpoint , headers = header_upload ,
132+ cookies = self .cookies , data = data ).json ()
133+ bit_id = bit_id ['uuid' ]
134+ yield bit_id
87135
88136 def _pop_inst (self , class_type , rec_obj ):
89137 """Populate class instance with data from record."""
@@ -100,6 +148,7 @@ def _pop_inst(self, class_type, rec_obj):
100148 return rec_obj
101149
102150 def _build_uuid_list (self , rec_obj , children ):
151+ """Builds a list of the uuids for an object's children."""
103152 child_list = []
104153 for child in rec_obj [children ]:
105154 child_list .append (child ['uuid' ])
@@ -138,15 +187,14 @@ class MetadataEntry(BaseRecord):
138187 language = Field ()
139188
140189
141- def build_file_list_remote (directory_url , file_extension ):
142- """Build list of files in local directory."""
143- file_list = {}
190+ def build_file_dict_remote (directory_url , file_type , file_dict ):
191+ """Build list of files in a remote directory."""
144192 response = requests .get (directory_url )
145193 links = html .fromstring (response .content ).iterlinks ()
146- for link in links :
147- if link [2 ].endswith ( file_extension ):
148- file_list [ link [ 2 ] ] = f'{ directory_url } { link [2 ]} '
149- return file_list
194+ for link in [ l for l in links if l [ 2 ]. endswith ( file_type )] :
195+ file_identifier = link [2 ].replace ( f'. { file_type } ' , '' )
196+ file_dict [ file_identifier ] = f'{ directory_url } { link [2 ]} '
197+ return file_dict
150198
151199
152200def elapsed_time (start_time , label ):
0 commit comments