Skip to content

Commit

Permalink
Created an ignore table so that seemingly duplicate records will not …
Browse files Browse the repository at this point in the history
…continuously be flagged.
  • Loading branch information
hover2pi committed Mar 2, 2016
1 parent f235e94 commit 76afb3f
Showing 1 changed file with 12 additions and 5 deletions.
17 changes: 12 additions & 5 deletions astrodbkit/astrodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,15 @@ def add_data(self, ascii, table, delimiter='|', bands=''):
del_records = []
for n,new_rec in enumerate(new_records):

# Convert relative path to absolute path
relpath = new_rec['spectrum']
if relpath.startswith('$'):
abspath = os.popen('echo {}'.format(relpath.split('/')[0])).read()[:-1]
if abspath: new_rec['spectrum'] = relpath.replace(relpath.split('/')[0],abspath)

# Test if the file exists and try to pull metadata from the FITS header
if os.path.isfile(new_rec['spectrum']):
new_records[n]['spectrum'] = relpath
new_records[n] = _autofill_spec_record(new_rec)
else:
print 'Error adding the spectrum at {}'.format(new_rec['spectrum'])
Expand All @@ -144,8 +151,8 @@ def add_data(self, ascii, table, delimiter='|', bands=''):
print 'No new records added to the {} table. Check your input file {}'.format(table,ascii)

# Run table clean up
try: self.clean_up(table)
except: print 'Could not run clean_up() method.'
# try: self.clean_up(table)
# except: print 'Could not run clean_up() method.'

else: print 'Please check the file path {}'.format(ascii)

Expand Down Expand Up @@ -180,15 +187,15 @@ def clean_up(self, table):
# Check for records with identical required values but different ids.
if table.lower()!='sources': req_keys = columns[np.where(np.logical_and(required,columns!='id'))]

# List of new pairs to ignore
new_ignore = []
# List of old and new pairs to ignore
ignore, new_ignore = ignore or [], []

while any(duplicate):
# Pull out duplicates one by one
duplicate = self.query("SELECT t1.id, t2.id FROM {0} t1 JOIN {0} t2 ON t1.source_id=t2.source_id WHERE t1.id!=t2.id AND {1}{2}{3}"\
.format(table, ' AND '.join(['t1.{0}=t2.{0}'.format(i) for i in req_keys]), (' AND '\
+' AND '.join(["(t1.id NOT IN ({0}) and t2.id NOT IN ({0}))".format(','.join(map(str,[id1,id2]))) for id1,id2 \
in zip(ignore['id1'],ignore['id2'])])) if ignore!='' else '', (' AND '\
in zip(ignore['id1'],ignore['id2'])])) if ignore else '', (' AND '\
+' AND '.join(["(t1.id NOT IN ({0}) and t2.id NOT IN ({0}))".format(','.join(map(str,ni))) for ni \
in new_ignore])) if new_ignore else ''), fetch='one')

Expand Down

0 comments on commit 76afb3f

Please sign in to comment.