## Some Extra Needed Codes
Just for organizing files...

### Imports and Variables

In [1]:
import os
import re

# directory = 'Extra_Codes_Directory'
directory = 'Voices'
os.makedirs(directory, exist_ok=True)

### File Renaming

In [None]:
def rename_files(start_number, directory):
    current = start_number

    while True:
        old_filename = os.path.join(directory, f"{current}.m4a")
        new_filename = os.path.join(directory, f"{current - 1}.m4a")

        if os.path.exists(old_filename):
            print(f"Renaming {old_filename} → {new_filename}")
            os.rename(old_filename, new_filename)
            current += 1
        else:
            print(f"No file found: {old_filename}. Stopping.")
            break

# Example usage:
rename_files(8, "Voices")

### Missed Numbers in Voice Files

In [3]:
# pattern = re.compile(r'^(\d+)\.m4a$') # 1.m4a, 2.m4a, etc.
pattern = re.compile(r'^Voice (\d+)\.m4a$') # Voice 1.m4a, Voice 2.m4a, etc.

numbers = []

for filename in os.listdir(directory):
    match = pattern.match(filename)
    if match:
        numbers.append(int(match.group(1)))

if not numbers:
    print("No .m4a files with numeric names found.")
else:
    numbers = sorted(numbers)
    missing = []

    for num in range(numbers[0], numbers[-1] + 1):
        if num not in numbers:
            missing.append(num)

    if missing:
        print("Missing numbers:", missing)
    else:
        print("No missing numbers. Sequence is complete!")


No missing numbers. Sequence is complete!


### Shift Numbers in File Names

In [None]:
pattern = re.compile(r'(Voice )(\d{3})(\.m4a)')

for filename in os.listdir(directory):
    match = pattern.match(filename)
    if match:
        prefix, number_str, suffix = match.groups()
        new_number = int(number_str) + 410
        new_filename = f"{prefix}{new_number:03d}{suffix}"
        old_path = os.path.join(directory, filename)
        new_path = os.path.join(directory, new_filename)
        os.rename(old_path, new_path)
        print(f"Renamed: {filename} → {new_filename}")

print("✅ Done renaming all matching files!")


### Remove All Empty Lines

In [None]:
def remove_empty_lines(input_file, output_file, encoding='utf-8'):
    try:
        with open(input_file, 'r', encoding=encoding) as infile:
            lines = infile.readlines()
        
        non_empty_lines = [line for line in lines if line.strip() != '']
        
        with open(output_file, 'w', encoding=encoding) as outfile:
            outfile.writelines(non_empty_lines)
        
        print(f"Empty lines removed and saved to {output_file}")
    
    except UnicodeDecodeError:
        print(f"Error: Unable to read the file with the {encoding} encoding. Please check the file encoding.")

input_file = 'Labels/Turki.Text.txt'  
output_file = 'Labels/Turki.Text_cleared.txt' 

remove_empty_lines(input_file, output_file)

print(f"Empty lines removed and saved to {output_file}")

### Convert all `m4a` audio tracks to `mp3`

In [None]:
%pip install pydub

In [None]:
import os
from pydub import AudioSegment


directory = './Voices'

for filename in os.listdir(directory):
    if filename.lower().endswith('.m4a'):
        m4a_path = os.path.join(directory, filename)
        mp3_filename = os.path.splitext(filename)[0] + '.mp3'
        mp3_path = os.path.join(directory, mp3_filename)

        audio = AudioSegment.from_file(m4a_path, format='m4a')
        
        audio.export(mp3_path, format='mp3')
        print(f'Converted: {filename} → {mp3_filename}')

        os.remove(m4a_path)
        print(f'Deleted original: {filename}')

print('✅ All m4a files converted and cleaned up!')


### Convert all `ogg` audio tracks to `m4a`

In [2]:
import os
import subprocess

directory = 'Voices'

for filename in os.listdir(directory):
    if filename.endswith('.ogg'):
        ogg_path = os.path.join(directory, filename)
        m4a_path = os.path.join(directory, filename.replace('.ogg', '.m4a'))
        
        result = subprocess.run([
            'ffmpeg', '-i', ogg_path, '-c:a', 'aac', '-b:a', '192k', m4a_path
        ], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        
        if result.returncode == 0:
            print(f"✅ Converted: {filename} → {os.path.basename(m4a_path)}")
            os.remove(ogg_path)
            print(f"🗑️ Deleted original: {filename}")
        else:
            print(f"❌ Failed to convert: {filename}")
            print(result.stderr.decode())


✅ Converted: audio_100@24-04-2025_17-47-09.ogg → audio_100@24-04-2025_17-47-09.m4a
🗑️ Deleted original: audio_100@24-04-2025_17-47-09.ogg
✅ Converted: audio_101@24-04-2025_17-47-09.ogg → audio_101@24-04-2025_17-47-09.m4a
🗑️ Deleted original: audio_101@24-04-2025_17-47-09.ogg
✅ Converted: audio_102@24-04-2025_17-47-09.ogg → audio_102@24-04-2025_17-47-09.m4a
🗑️ Deleted original: audio_102@24-04-2025_17-47-09.ogg
✅ Converted: audio_103@24-04-2025_17-47-09.ogg → audio_103@24-04-2025_17-47-09.m4a
🗑️ Deleted original: audio_103@24-04-2025_17-47-09.ogg
✅ Converted: audio_104@24-04-2025_17-47-09.ogg → audio_104@24-04-2025_17-47-09.m4a
🗑️ Deleted original: audio_104@24-04-2025_17-47-09.ogg
✅ Converted: audio_105@24-04-2025_17-47-09.ogg → audio_105@24-04-2025_17-47-09.m4a
🗑️ Deleted original: audio_105@24-04-2025_17-47-09.ogg
✅ Converted: audio_106@24-04-2025_17-47-09.ogg → audio_106@24-04-2025_17-47-09.m4a
🗑️ Deleted original: audio_106@24-04-2025_17-47-09.ogg
✅ Converted: audio_107@24-04-2025_