Skip to content

Commit

Permalink
[PS] summarize skips a word if (seg,env) pair duplicates
Browse files Browse the repository at this point in the history
re: issue #801
  • Loading branch information
stannam committed Apr 20, 2022
1 parent a3d2716 commit 3f1b04d
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions corpustools/gui/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,12 @@ def _summarize(self, segsum):
sc = line['Min Syllable Number'], line['Max Syllable Number'] # syllable count min/max
filters = wf, pc, sc
for i, seg in enumerate(segs):
try:
# do not count duplicates. For example, in s.ɑ.s.i, [s_] should only be counted once.
if segenvfilters == (seg, envs[i], filters, tier, res_type):
continue
except NameError:
pass
segenvfilters = seg, envs[i], filters, tier, res_type # segs + envs + (freq and phoneme/syllable count filters) + result_type
if res_type == 'positive': # if positive search
if line['raw_env'][i] is not None: # then check if the word is in results for satisfying env[i]
Expand All @@ -598,6 +604,7 @@ def _summarize(self, segsum):
continue
typefreq[segenvfilters] += 1 # the word is in the result for NOT satisfying the env[i] so +1
tokenfreq[segenvfilters] += line['Word'].frequency # and add token freq accordingly
del segenvfilters

self.rows = list()
for k, v in sorted(typefreq.items()):
Expand Down

0 comments on commit 3f1b04d

Please sign in to comment.