In [1]:
import os
import shutil
import csv

# Define paths
media_folder = "media"
nlp_folder = "transcripts/nlp_references"
combined_folder = "combined"

# Ensure the combined folder exists
os.makedirs(combined_folder, exist_ok=True)

# Get all MP3 files in the media folder
mp3_files = {os.path.splitext(f)[0]: os.path.join(media_folder, f) for f in os.listdir(media_folder) if f.endswith('.mp3')}

# Process each corresponding NLP file
for base_name, mp3_path in mp3_files.items():
    nlp_path = os.path.join(nlp_folder, base_name + ".nlp")
    txt_output_path = os.path.join(combined_folder, base_name, base_name + ".txt")

    # Check if the corresponding NLP file exists
    if os.path.exists(nlp_path):
        # Create a unique folder for this file combination
        folder_path = os.path.join(combined_folder, base_name)
        os.makedirs(folder_path, exist_ok=True)

        # Extract words from NLP file
        words = []
        with open(nlp_path, 'r', encoding='utf-8') as nlp_file:
            reader = csv.reader(nlp_file, delimiter='|')
            next(reader)  # Skip header row

            for row in reader:
                if row:
                    words.append(row[0])  # Extract token

        # Save extracted words to a .txt file
        with open(txt_output_path, 'w', encoding='utf-8') as txt_file:
            txt_file.write(" ".join(words))  # Space-separated words

        # Move MP3 file into the folder
        shutil.move(mp3_path, os.path.join(folder_path, base_name + ".mp3"))

        print(f"Processed {base_name}: Folder created with .mp3 and .txt file.")

    else:
        print(f"Skipping {base_name}: No corresponding NLP file found.")


Processed 2020-03-0230487MTN-Ghana-2019-Annual-Results-Call: Folder created with .mp3 and .txt file.
Processed 2020-Annual-Results-Call-Recording: Folder created with .mp3 and .txt file.
Processed 4329526: Folder created with .mp3 and .txt file.
Processed 4351517: Folder created with .mp3 and .txt file.
Processed 4372696: Folder created with .mp3 and .txt file.
Processed 4420696: Folder created with .mp3 and .txt file.
Processed 4423872: Folder created with .mp3 and .txt file.
Processed 4426736: Folder created with .mp3 and .txt file.
Processed 4430051: Folder created with .mp3 and .txt file.
Processed 4432298: Folder created with .mp3 and .txt file.
Processed 4443871: Folder created with .mp3 and .txt file.
Processed 4443920: Folder created with .mp3 and .txt file.
Processed 4446796: Folder created with .mp3 and .txt file.
Processed 4448760: Folder created with .mp3 and .txt file.
Processed 4449269: Folder created with .mp3 and .txt file.
Processed 4450488: Folder created with .mp3 an