From 9f04ce0a6ada3f6bf4a93fe0c28b83985f604703 Mon Sep 17 00:00:00 2001 From: Michael McAuliffe Date: Mon, 30 Aug 2021 10:53:23 -0700 Subject: [PATCH] Version bump to 1.2 --- docs/source/acoustics_encoding.rst | 16 +++----- docs/source/changelog.rst | 1 + polyglotdb/__init__.py | 2 +- polyglotdb/acoustics/pitch/base.py | 41 ++++++++++++------- polyglotdb/corpus/audio.py | 15 ++++++- .../query/annotations/attributes/acoustic.py | 4 +- polyglotdb/query/annotations/results.py | 2 +- 7 files changed, 51 insertions(+), 30 deletions(-) diff --git a/docs/source/acoustics_encoding.rst b/docs/source/acoustics_encoding.rst index ced820e4..2a79eb9f 100644 --- a/docs/source/acoustics_encoding.rst +++ b/docs/source/acoustics_encoding.rst @@ -51,8 +51,7 @@ The default source is Praat. c.analyze_pitch(source='reaper') -If the source is `praat`, the Praat executable must be discoverable on the system path (i.e., a call of `praat` in a terminal works). -Likewise, if the source is `reaper`, the Reaper executable must be on the path or the full path to the Reaper executable must be specified. +If the source is `praat`, the Praat executable must be discoverable on the system path (i.e., a call of `praat` in a terminal works). Likewise, if the source is `reaper`, the Reaper executable must be on the path or the full path to the Reaper executable must be specified. .. _pitch_algorithms: @@ -75,20 +74,17 @@ Similar to the `source`, attribute, the `algorithm` can be toggled between :code c.analyze_pitch(algorithm='speaker_adapted') -The :code:`"base"` algorithm uses a minimum pitch of 55 Hz and a maximum pitch of 480 Hz. +The :code:`"base"` algorithm uses a default minimum pitch of 50 Hz and a maximum pitch of 500 Hz, but these can be changed through the ``absolute_min_pitch`` and ``absolute_max_pitch`` parameters. The :code:`"gendered"` algorithm checks whether a `Gender` property is available for speakers. If a speaker has a property value that starts with `f` (i.e., female), -utterances by that speakers will use a minimum pitch of 100 Hz and a maximum pitch of 480 Hz. If they have a property +utterances by that speakers will use a minimum pitch of 100 Hz and a maximum pitch of 500 Hz. If they have a property value of `m` (i.e., male), -utterances by that speakers will use a minimum pitch of 55 Hz and a maximum pitch of 400 Hz. +utterances by that speakers will use a minimum pitch of 50 Hz and a maximum pitch of 400 Hz. The :code:`"speaker_adapted"` algorithm does two passes of pitch estimation. The first is identical to :code:`"base"` -and uses a minimum pitch of 55 Hz and a maximum pitch of 480 Hz. -This first pass is used to estimate by-speaker means and standard deviations of F0. The mean and SD for each speaker is -then used to generate per-speaker minimum and maximum pitch values. -The minimum pitch value is 3 standard deviations below the speaker mean, and the maximum pitch value is 3 standard -deviations above the speaker mean. +and uses a minimum pitch of 50 Hz and a maximum pitch of 500 Hz (or whatever the parameters have been set to). +This first pass is used to estimate by-speaker means of F0. Speaker-specific pitch floors and ceilings are calculated by adding or subtracting the number of octaves that the ``adjusted_octaves`` parameter specifies. The default is 1, so the per-speaker pitch range will be one octave below and above the speaker's mean pitch. .. _intensity_encoding: diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 5f0585d0..d017a2f1 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -10,6 +10,7 @@ Version 1.2 * Upgraded InfluxDB compatibility to 1.8.9 * Changed Praat TextGrid handling to use praatio 4.1 * Phone parsing no longer includes blank intervals (i.e. silences), so preceding and following phone calculations have changed +* Update speaker adjusted pitch algorithm to use octave based min and max pitch rather than the more permissive standard deviation approach Version 1.0 =========== diff --git a/polyglotdb/__init__.py b/polyglotdb/__init__.py index 74b37149..f17ec6c5 100755 --- a/polyglotdb/__init__.py +++ b/polyglotdb/__init__.py @@ -1,6 +1,6 @@ __ver_major__ = 1 __ver_minor__ = 2 -__ver_patch__ = '0a1' +__ver_patch__ = 0 __version__ = f"{__ver_major__}.{__ver_minor__}.{__ver_patch__}" __all__ = ['query', 'io', 'corpus', 'config', 'exceptions', 'CorpusContext', 'CorpusConfig'] diff --git a/polyglotdb/acoustics/pitch/base.py b/polyglotdb/acoustics/pitch/base.py index 65ff5ce1..b29553a2 100644 --- a/polyglotdb/acoustics/pitch/base.py +++ b/polyglotdb/acoustics/pitch/base.py @@ -26,7 +26,8 @@ def analyze_utterance_pitch(corpus_context, utterance, source='praat', min_pitch (u:{utt_type}:{corpus_name})-[:spoken_by]->(s), (u)-[:spoken_in]->(d) WHERE u.id = $utterance_id - RETURN u, d, r.channel as channel'''.format(corpus_name=corpus_context.cypher_safe_name, utt_type=utt_type) + RETURN u, d, r.channel as channel'''.format(corpus_name=corpus_context.cypher_safe_name, + utt_type=utt_type) results = corpus_context.execute_cypher(statement, utterance_id=utterance_id) segment_mapping = SegmentMapping() for r in results: @@ -50,7 +51,7 @@ def analyze_utterance_pitch(corpus_context, utterance, source='praat', min_pitch if v['F0'] is None or v['F0'] <= 0: continue p = TimePoint(k) - p.add_value('F0', v['F0']) + p.add_value('F0', v['F0']) track.add(p) if 'pitch' not in corpus_context.hierarchy.acoustics: corpus_context.hierarchy.add_acoustic_properties(corpus_context, 'pitch', [('F0', float)]) @@ -74,8 +75,9 @@ def update_utterance_pitch_track(corpus_context, utterance, new_track): (p:{phone_type}:{corpus_name})-[:contained_by*]->(u) WHERE u.id = $utterance_id SET u.pitch_last_edited = $date - RETURN u, d, r.channel as channel, s, collect(p) as p'''.format(corpus_name=corpus_context.cypher_safe_name, - utt_type=utt_type, phone_type=phone_type) + RETURN u, d, r.channel as channel, s, collect(p) as p'''.format( + corpus_name=corpus_context.cypher_safe_name, + utt_type=utt_type, phone_type=phone_type) results = corpus_context.execute_cypher(statement, utterance_id=utterance_id, date=time_stamp) for r in results: @@ -136,6 +138,9 @@ def analyze_pitch(corpus_context, source='praat', algorithm='base', call_back=None, + absolute_min_pitch=50, + absolute_max_pitch=500, + adjusted_octaves=1, stop_check=None, multiprocessing=True): """ @@ -143,16 +148,27 @@ def analyze_pitch(corpus_context, ---------- corpus_context : :class:`~polyglotdb.corpus.audio.AudioContext` source : str + Program to use for analyzing pitch, either ``praat`` or ``reaper`` algorithm : str + Algorithm to use, ``base``, ``gendered``, or ``speaker_adjusted`` + absolute_min_pitch : int + Absolute pitch floor + absolute_max_pitch : int + Absolute pitch ceiling + adjusted_octaves : int + How many octaves around the speaker's mean pitch to set the speaker adjusted pitch floor and ceiling + stop_check : callable + Function to check whether processing should stop early call_back : callable - stop_check : callable + Function to report progress + multiprocessing : bool + Flag whether to use multiprocessing or threading Returns ------- """ - absolute_min_pitch = 50 - absolute_max_pitch = 500 + if not 'utterance' in corpus_context.hierarchy: raise (Exception('Must encode utterances before pitch can be analyzed')) segment_mapping = generate_utterance_segments(corpus_context, padding=PADDING).grouped_mapping('speaker') @@ -181,18 +197,17 @@ def analyze_pitch(corpus_context, output = analyze_segments(v, pitch_function, stop_check=stop_check, multiprocessing=multiprocessing) sum_pitch = 0 - sum_square_pitch = 0 n = 0 for seg, track in output.items(): for t, v in track.items(): v = v['F0'] if v is not None and v > 0: # only voiced frames - n += 1 sum_pitch += v - sum_square_pitch += v * v - speaker_data[k] = [sum_pitch / n, math.sqrt((n * sum_square_pitch - sum_pitch * sum_pitch) / (n * (n - 1)))] + mean_pitch = sum_pitch / n + speaker_data[k] = int(mean_pitch / math.pow(2, adjusted_octaves)), \ + int( mean_pitch * math.pow(2, adjusted_octaves)) for i, ((speaker,), v) in enumerate(segment_mapping.items()): if call_back is not None: @@ -214,9 +229,7 @@ def analyze_pitch(corpus_context, pitch_function = generate_pitch_function(source, min_pitch, max_pitch, path=path) elif algorithm == 'speaker_adjusted': - mean_pitch, sd_pitch = speaker_data[speaker] - min_pitch = int(mean_pitch - 3 * sd_pitch) - max_pitch = int(mean_pitch + 3 * sd_pitch) + min_pitch, max_pitch = speaker_data[speaker] if min_pitch < absolute_min_pitch: min_pitch = absolute_min_pitch if max_pitch > absolute_max_pitch: diff --git a/polyglotdb/corpus/audio.py b/polyglotdb/corpus/audio.py index 3bf8d0b2..e045d846 100755 --- a/polyglotdb/corpus/audio.py +++ b/polyglotdb/corpus/audio.py @@ -256,7 +256,9 @@ def generate_spectrogram(self, discourse, file_type='consonant', begin=None, end signal, sr = self.load_waveform(discourse, file_type, begin, end) return generate_spectrogram(signal, sr) - def analyze_pitch(self, source='praat', algorithm='base', stop_check=None, call_back=None, multiprocessing=True): + def analyze_pitch(self, source='praat', algorithm='base', + absolute_min_pitch=50, absolute_max_pitch=500, adjusted_octaves=1, + stop_check=None, call_back=None, multiprocessing=True): """ Analyze pitch tracks and save them to the database. @@ -268,6 +270,12 @@ def analyze_pitch(self, source='praat', algorithm='base', stop_check=None, call_ Program to use for analyzing pitch, either ``praat`` or ``reaper`` algorithm : str Algorithm to use, ``base``, ``gendered``, or ``speaker_adjusted`` + absolute_min_pitch : int + Absolute pitch floor + absolute_max_pitch : int + Absolute pitch ceiling + adjusted_octaves : int + How many octaves around the speaker's mean pitch to set the speaker adjusted pitch floor and ceiling stop_check : callable Function to check whether processing should stop early call_back : callable @@ -275,7 +283,8 @@ def analyze_pitch(self, source='praat', algorithm='base', stop_check=None, call_ multiprocessing : bool Flag whether to use multiprocessing or threading """ - analyze_pitch(self, source, algorithm, stop_check, call_back, multiprocessing=multiprocessing) + analyze_pitch(self, source, algorithm, stop_check=stop_check, call_back=call_back, multiprocessing=multiprocessing, + absolute_min_pitch=absolute_min_pitch, absolute_max_pitch=absolute_max_pitch, adjusted_octaves=adjusted_octaves) def analyze_utterance_pitch(self, utterance, source='praat', **kwargs): """ @@ -813,6 +822,8 @@ def _save_measurement_tracks(self, acoustic_name, tracks, speaker): v = sanitize_value(value[name], type) if v is not None: fields[name] = v + elif type in [int, float]: + fields[name] = type(-1) if not fields: continue if set_label is None: diff --git a/polyglotdb/query/annotations/attributes/acoustic.py b/polyglotdb/query/annotations/attributes/acoustic.py index 558c274a..620cdefe 100755 --- a/polyglotdb/query/annotations/attributes/acoustic.py +++ b/polyglotdb/query/annotations/attributes/acoustic.py @@ -251,9 +251,9 @@ def hydrate(self, corpus, utterance_id, begin, end): undef_regions.append((x1, x[i + 1])) new_data = RawTrack() for o in self.attribute.output_columns: - y = [data[x1][o] for x1 in x] + y = [data[x1][o] for x1 in x if data[x1][o] and data[x1][o] > 0] if len(y) > 1: - f = interpolate.interp1d([float(x1) for x1 in x], y) + f = interpolate.interp1d([float(x1) for x1 in x if data[x1][o] and data[x1][o] > 0], y) for k in new_times: out_time = k if self.attribute.relative_time: diff --git a/polyglotdb/query/annotations/results.py b/polyglotdb/query/annotations/results.py index dd68bab2..a36cba81 100755 --- a/polyglotdb/query/annotations/results.py +++ b/polyglotdb/query/annotations/results.py @@ -223,7 +223,7 @@ def rows_for_csv(self): for point in line.track: line = {} line.update(baseline) - line.update({'time': point.time}) + line.update({'time': round(point.time, 4)}) line.update(point.select_values(self.track_columns)) yield line else: