Ensure clean up of audio files on reset/discourse removal

MontrealCorpusTools · Jul 16, 2019 · 70ed648 · 70ed648
1 parent 79571b9
commit 70ed648
Show file tree

Hide file tree

Showing 3 changed files with 17 additions and 4 deletions.
diff --git a/polyglotdb/acoustics/io.py b/polyglotdb/acoustics/io.py
@@ -64,7 +64,6 @@ def add_discourse_sound_info(corpus_context, discourse, filepath):
     else:
         shutil.copy(filepath, low_freq_path)
         low_freq_rate = sample_rate
-    user_path = os.path.expanduser('~')
     statement = '''MATCH (d:Discourse:{corpus_name}) where d.name = {{discourse_name}}
                     SET d.file_path = {{filepath}},
                     d.consonant_file_path = {{consonant_filepath}},
@@ -74,9 +73,9 @@ def add_discourse_sound_info(corpus_context, discourse, filepath):
                     d.sampling_rate = {{sampling_rate}},
                     d.num_channels = {{n_channels}}'''.format(corpus_name=corpus_context.cypher_safe_name)
     corpus_context.execute_cypher(statement, filepath=filepath,
-                                  consonant_filepath=consonant_path.replace(user_path, '~'),
-                                  vowel_filepath=vowel_path.replace(user_path, '~'),
-                                  low_freq_filepath=low_freq_path.replace(user_path, '~'),
+                                  consonant_filepath=consonant_path,
+                                  vowel_filepath=vowel_path,
+                                  low_freq_filepath=low_freq_path,
                                   duration=duration, sampling_rate=sample_rate,
                                   n_channels=n_channels, discourse_name=discourse)
 

diff --git a/polyglotdb/corpus/base.py b/polyglotdb/corpus/base.py
@@ -345,6 +345,7 @@ def reset(self, call_back=None, stop_check=None):
         """
         self.reset_acoustics()
         self.reset_graph(call_back, stop_check)
+        shutil.rmtree(self.config.base_dir, ignore_errors=True)
 
     def query_graph(self, annotation_node):
         """
@@ -447,6 +448,10 @@ def remove_discourse(self, name):
         name : str
             Name of the discourse to remove
         """
+        d = self.discourse_sound_file(name)
+        if d['consonant_file_path'] is not None and os.path.exists(d['consonant_file_path']):
+            directory = self.discourse_audio_directory(name)
+            shutil.rmtree(directory, ignore_errors=True)
         # Remove tokens in discourse
         statement = '''MATCH (d:{corpus_name}:Discourse)<-[:spoken_in]-(n:{corpus_name})
         WHERE d.name = {{discourse}}

diff --git a/tests/test_base.py b/tests/test_base.py
@@ -41,6 +41,9 @@ def test_load_discourse(graph_db, mfa_test_dir, textgrid_test_dir):
         q = c.query_speakers().filter(c.speaker.name == 'mfa')
         assert q.count() > 0
 
+        d = c.discourse_sound_file('acoustic_corpus')
+        assert os.path.exists(d['consonant_file_path'])
+
 
 def test_remove_discourse(graph_db):
     with CorpusContext('load_remove_test', **graph_db) as c:
@@ -67,3 +70,9 @@ def test_remove_discourse(graph_db):
         q = c.query_speakers().filter(c.speaker.name == 'mfa')
         assert q.count() == 0
 
+        d = c.discourse_sound_file('acoustic_corpus')
+        assert os.path.exists(d['consonant_file_path'])
+
+        c.remove_discourse('acoustic_corpus')
+        assert not os.path.exists(d['consonant_file_path'])
+