Skip to content

Commit

Permalink
Correct results when not collapsing homophones
Browse files Browse the repository at this point in the history
issue #785
  • Loading branch information
stannam committed Dec 2, 2021
1 parent 6a5d475 commit 83bdf7e
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 3 deletions.
2 changes: 1 addition & 1 deletion corpustools/neighdens/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def print_all_neighden_results(output_filename, neighors_dict):
except ValueError:
s = word
t = ''
t = t[:-1].replace('.', '')
t = t[:-2].replace('.', '')
if not neighbors:
print('\t'.join([s, t, '0', '']), file=outf)
else:
Expand Down
16 changes: 14 additions & 2 deletions corpustools/neighdens/neighborhood_density.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,19 @@ def neighborhood_density_all_words(corpus_context, tierdict, tier_type = None, s
algorithm = algorithm,
max_distance = max_distance,
collapse_homophones = collapse_homophones)
w_t_key = f'{w} [{w.transcription}]'

# w_t_key is the pair of spelling and transcription which will be the key for the dictionary 'results'
# Need to add a unique number after the pair, since spelling + transcription pair may be duplicate,
# See the documentation on the 'Collapse homophones' option for the use case scenario and also Issue #785.
n = 0
w_t_key = f'{w} [{w.transcription}]{n}'
if w_t_key in results:
while True:
n += 1
w_t_key = f'{w} [{w.transcription}]{n}'
if w_t_key not in results:
break

results[w_t_key] = [getattr(r, output_format) for r in res[1]]
setattr(w.original, settable_attr.name, res[0])

Expand Down Expand Up @@ -189,7 +201,7 @@ def neighborhood_density(corpus_context, query, tierdict,
continue

matches.append(w)
neighbors = set(matches)-set([query])
neighbors = matches

return (len(neighbors), neighbors)

Expand Down

0 comments on commit 83bdf7e

Please sign in to comment.