Skip to content

Commit

Permalink
launching script for new extraction date does not overwrite metadatas
Browse files Browse the repository at this point in the history
  • Loading branch information
juliepierson committed May 4, 2017
1 parent 9baf623 commit db1b03b
Showing 1 changed file with 11 additions and 9 deletions.
20 changes: 11 additions & 9 deletions csw-harvester.py
Expand Up @@ -78,7 +78,7 @@ def id_sdi_already_exists(id_sdi):
sources = 'sources-csw.csv'
completion = False
#date = time.strftime("%d/%m/%Y") # dd/mm/yyyy, ex. '31/08/2015'. Par défaut, date du jour : time.strftime("%d/%m/%Y")
date = time.strftime("%Y-%m-%d") # yyyy-mm-dd, ex. '2015-08-31'. Par défaut, date du jour : time.strftime("%Y-%m-%d")
date = time.strftime("%Y-%m-%d") # yyyy-mm-dd, ex. '2015-08-31'. Par défaut, date du jour : time.strftime("%Y-%m-%d")

parser = OptionParser()

Expand All @@ -92,7 +92,7 @@ def id_sdi_already_exists(id_sdi):
action="store", default=LOG_FILE,
help="LOG file")
parser.add_option("-c", "--completion", dest="completion",
action="store_true", default=True,
action="store_true", default=False,
help="completion mode")
parser.add_option("-d", "--date", dest="date",
action="store", default=date,
Expand Down Expand Up @@ -160,9 +160,8 @@ def get_records(num, name, begin_record, end_record, MAXR, url, url_csw):
# pour les mettre dans un dico
csw_records = csw.records
# si on est en mode "complétion" : il ne doit pas y avoir de doublons au niveau des id longs de md
if completion == True:
print 'true'
check_id_md(csw_records, num)
if completion == True:
check_id_md(csw_records, num, options.date)
# si end_record est spécifié et possible, on le prend en compte :
if end_record and end_record < csw.results['matches'] :
getMATCHES = end_record
Expand Down Expand Up @@ -206,7 +205,7 @@ def get_records(num, name, begin_record, end_record, MAXR, url, url_csw):
csw_records = csw.records
# si on est en mode "complétion" : il ne doit pas y avoir de doublons au niveau des id longs de md
if completion == True:
check_id_md(csw_records, num)
check_id_md(csw_records, num, options.date)
logger.info('matches %s (from %s to %s) ; first record %s ; nextrecord %s ; returned %s' % (getMATCHES-begin_record+1, begin_record, getMATCHES, queries['startposition'], csw.results['nextrecord'], csw.results['returned']))
# création du dico vide pour stocker les valeurs
dico_bdd = create_dico_bdd()
Expand Down Expand Up @@ -324,8 +323,8 @@ def get_value_resp(dico_values, bigliste, type_contact, id_yes):
id_responsibleparty+=1


# vérifie si l'identifiant long de la md est déjà dans la base pour une idg
def check_id_md(csw_records, id_sdi):
# vérifie si l'identifiant long de la md est déjà dans la base pour une idg et une même date
def check_id_md(csw_records, id_sdi, date_extraction):

# list of future ids
future_ids = csw_records.keys()
Expand All @@ -335,7 +334,9 @@ def check_id_md(csw_records, id_sdi):
con = psycopg2.connect("host=" + host + " port=" + port + " dbname=" + dbname + " user=" + user + " password=" + password)
# con = psycopg2.connect("dbname=" + dbname + " user=" + user + " password=" + password)
cur = con.cursor()
query = 'select metadata.identifier from ' + schema + '.metadata, ' + schema + '.sdi, ' + schema + '.extraction where metadata.id_metadata = extraction.id_metadata and extraction.id_sdi = sdi.id_sdi and sdi.id_sdi = ' + id_sdi + ';'
query = 'select metadata.identifier from ' + schema + '.metadata, ' + schema + '.sdi, ' \
+ schema + '.extraction where metadata.id_metadata = extraction.id_metadata and extraction.id_sdi = sdi.id_sdi and sdi.id_sdi = ' \
+ id_sdi + " and extraction.date_extraction='" + date_extraction + "';"
cur.execute(query)
current_ids = [record[0] for record in cur]
cur.close()
Expand Down Expand Up @@ -392,6 +393,7 @@ def get_value_list(values, liste, path, nb_max_char):
def fill_db(dico_values):
con = None
con = psycopg2.connect("host=" + host + " port=" + port + " dbname=" + dbname + " user=" + user + " password=" + password)
# con = psycopg2.connect("dbname=" + dbname + " user=" + user + " password=" + password)
cur = con.cursor()
list_table = ['sdi','metadata','extraction','dataidentification','geographicboundingbox','keyword','responsibleparty','contact']
for table in list_table:
Expand Down

0 comments on commit db1b03b

Please sign in to comment.