In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [None]:
from pydub import AudioSegment
import os


target_sample_rate = 16000
audio_dir = "/content/drive/My Drive/Tamil_Audio"
processed_audio_dir = "/content/drive/My Drive/Tamil_Audio_Processed"
os.makedirs(processed_audio_dir, exist_ok=True)

# Normalize and convert each audio file
for filename in os.listdir(audio_dir):
    if filename.endswith(".wav"):
        audio_path = os.path.join(audio_dir, filename)
        audio = AudioSegment.from_wav(audio_path)

        # Normalize the audio
        audio = audio.set_frame_rate(target_sample_rate).normalize()

        # Export to processed folder
        audio.export(os.path.join(processed_audio_dir, filename), format="wav")

print("Audio files have been normalized and saved to:", processed_audio_dir)


Audio files have been normalized and saved to: /content/drive/My Drive/Tamil_Audio_Processed


In [None]:
import os
import shutil

# Directories
audio_dir = "/content/drive/My Drive/Tamil_Audio"  # Original directory with .wav and .txt files
processed_audio_dir = "/content/drive/My Drive/Tamil_Audio_Processed"  # Directory with processed audio files

# Copy corresponding .txt files to processed directory
for filename in os.listdir(processed_audio_dir):
    if filename.endswith(".wav"):
        file_base = filename[:-4]  # Remove .wav extension
        transcript_filename = f"{file_base}.txt"
        transcript_path = os.path.join(audio_dir, transcript_filename)

        # Check if the transcript exists in the original directory
        if os.path.exists(transcript_path):
            shutil.copy(transcript_path, processed_audio_dir)
            print(f"Copied {transcript_filename} to {processed_audio_dir}")
        else:
            print(f"Transcript {transcript_filename} not found in {audio_dir}")

print("All corresponding .txt files have been added to Tamil_Audio_Processed.")


Copied 281474976896579_f1493_chunk_0.txt to /content/drive/My Drive/Tamil_Audio_Processed
Copied 281474976896574_f1497_chunk_0.txt to /content/drive/My Drive/Tamil_Audio_Processed
Copied 281474976903205_f1348_chunk_0.txt to /content/drive/My Drive/Tamil_Audio_Processed
Copied 281474976903218_f1184_chunk_0.txt to /content/drive/My Drive/Tamil_Audio_Processed
Copied 281474976884346_f8_chunk_0.txt to /content/drive/My Drive/Tamil_Audio_Processed
Copied 281474976897640_f118_chunk_0.txt to /content/drive/My Drive/Tamil_Audio_Processed
Copied 281474976897635_f3014_chunk_0.txt to /content/drive/My Drive/Tamil_Audio_Processed
Copied 281474976897639_f2713_chunk_0.txt to /content/drive/My Drive/Tamil_Audio_Processed
Copied 281474976899497_f2611_chunk_0.txt to /content/drive/My Drive/Tamil_Audio_Processed
Copied 281474976903199_f1343_chunk_0.txt to /content/drive/My Drive/Tamil_Audio_Processed
Copied 281474976896211_f667_chunk_0.txt to /content/drive/My Drive/Tamil_Audio_Processed
Copied 28147497

In [None]:
import re

def clean_text(text):
    # Remove unwanted characters and extra whitespace
    text = text.lower()
    text = re.sub(r"[^a-zA-Z0-9\s]", "", text)  # Keep alphanumeric characters and spaces
    text = re.sub(r"\s+", " ", text).strip()
    cleaned_text = re.sub(r"^[a-zA-Z0-9]+wav\s+\d+\s+", "", text)  # Matches the concatenated path ending with 'wav' and a number
    return cleaned_text.strip() # Replace multiple spaces with a single space


# Apply cleaning to each transcript
for filename in os.listdir(processed_audio_dir):
    if filename.endswith(".txt"):
        text_path = os.path.join(processed_audio_dir, filename)
        with open(text_path, "r") as file:
            text = file.read()
        cleaned_text = clean_text(text)
        with open(text_path, "w") as file:
            file.write(cleaned_text)

print("Transcripts have been cleaned and standardized.")


Transcripts have been cleaned and standardized.


In [None]:
import random
import shutil

# Set up directories for splits
train_dir = "/content/drive/My Drive/train"
val_dir = "/content/drive/My Drive/val"
test_dir = "/content/drive/My Drive/test"
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Get list of audio files (assuming corresponding .txt files exist for each .wav)
file_bases = [f[:-4] for f in os.listdir(processed_audio_dir) if f.endswith(".wav")]

# Shuffle and split
random.shuffle(file_bases)
train_split = int(0.8 * len(file_bases))
val_split = int(0.9 * len(file_bases))

train_files = file_bases[:train_split]
val_files = file_bases[train_split:val_split]
test_files = file_bases[val_split:]

# Function to copy files
def copy_files(file_list, destination_dir):
    for file_base in file_list:
        audio_file = os.path.join(processed_audio_dir, file_base + ".wav")
        transcript_file = os.path.join(processed_audio_dir, file_base + ".txt")
        if os.path.exists(audio_file) and os.path.exists(transcript_file):
            shutil.copy(audio_file, destination_dir)
            shutil.copy(transcript_file, destination_dir)

# Copy files to respective directories
copy_files(train_files, train_dir)
copy_files(val_files, val_dir)
copy_files(test_files, test_dir)

print("Data has been split into training, validation, and testing sets.")
print(f"Training set: {len(train_files)} files")
print(f"Validation set: {len(val_files)} files")
print(f"Testing set: {len(test_files)} files")


Data has been split into training, validation, and testing sets.
Training set: 307 files
Validation set: 38 files
Testing set: 39 files


In [None]:
# Clone the VITS repository


!git clone https://github.com/jaywalnut310/vits.git

# Change to the VITS directory
%cd vits

# List the contents of the directory to check if it's cloned successfully
!ls -l


Cloning into 'vits'...
remote: Enumerating objects: 81, done.[K
remote: Total 81 (delta 0), reused 0 (delta 0), pack-reused 81 (from 1)[K
Receiving objects: 100% (81/81), 3.33 MiB | 16.33 MiB/s, done.
Resolving deltas: 100% (22/22), done.
/content/vits/vits/monotonic_align/vits/monotonic_align/vits/monotonic_align/vits/monotonic_align/vits
total 168
-rw-r--r-- 1 root root 11780 Nov  5 19:11 attentions.py
-rw-r--r-- 1 root root  4778 Nov  5 19:11 commons.py
drwxr-xr-x 2 root root  4096 Nov  5 19:11 configs
-rw-r--r-- 1 root root 15027 Nov  5 19:11 data_utils.py
drwxr-xr-x 2 root root  4096 Nov  5 19:11 filelists
-rw-r--r-- 1 root root  6103 Nov  5 19:11 inference.ipynb
-rw-r--r-- 1 root root  1069 Nov  5 19:11 LICENSE
-rw-r--r-- 1 root root  1314 Nov  5 19:11 losses.py
-rw-r--r-- 1 root root  3825 Nov  5 19:11 mel_processing.py
-rw-r--r-- 1 root root 19375 Nov  5 19:11 models.py
-rw-r--r-- 1 root root 13166 Nov  5 19:11 modules.py
drwxr-xr-x 2 root root  4096 Nov  5 19:11 monotonic_al

In [None]:
!pip install Cython
!pip install librosa
!pip install matplotlib
!pip install numpy
!pip install phonemizer
!pip install scipy
!pip install tensorboard
!pip install torch torchvision
!pip install Unidecode


Collecting numpy>=1.15.0 (from librosa)
  Downloading numpy-2.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.9/60.9 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
Downloading numpy-2.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (19.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.5/19.5 MB[0m [31m73.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.21.6
    Uninstalling numpy-1.21.6:
      Successfully uninstalled numpy-1.21.6
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
arviz 0.20.0 requires matplotlib>=3.5, but you have matplotlib 3.3.1 which is incompatible.
bigframes 1.25.0 requires google-auth<3.0dev,>=2.15.0, but y

In [None]:
!apt-get install espeak


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
espeak is already the newest version (1.48.15+dfsg-3).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.


In [None]:
import os


def create_filelist(folder, filelist_name):
    with open(filelist_name, 'w') as f:
        for audio_file in sorted(os.listdir(folder)):
            if audio_file.endswith('.wav'):
                text_file = audio_file.replace('.wav', '.txt')
                f.write(f'{folder}/{audio_file}|{folder}/{text_file}\n')

create_filelist('/content/drive/My Drive/train', 'filelists/train_filelist.txt')
create_filelist('/content/drive/My Drive/val', 'filelists/val_filelist.txt')
create_filelist('/content/drive/My Drive/test', 'filelists/test_filelist.txt')


In [None]:
%cd monotonic_align


/content/vits/vits/monotonic_align/vits/monotonic_align/vits/monotonic_align/vits/monotonic_align


In [None]:
setup_file_path = '/content/vits/vits/monotonic_align/vits/monotonic_align/vits/monotonic_align/vits/monotonic_align/setup.py'
with open(setup_file_path, 'r') as file:
    content = file.readlines()

modified_content = []
for line in content:
    if 'ext_modules=cythonize' in line:
        modified_content.append('ext_modules=cythonize("core.pyx", language_level=3),\n')
    else:
        modified_content.append(line)

with open(setup_file_path, 'w') as file:
    file.writelines(modified_content)

In [None]:
!python setup.py build_ext --inplace


error: could not create 'monotonic_align/core.cpython-310-x86_64-linux-gnu.so': No such file or directory


In [None]:
!pwd


/content/vits/vits/monotonic_align/vits/monotonic_align


In [None]:
!ls


build  core.c  core.pyx  __init__.py  setup.py
