In [10]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
from os.path import exists
from pydub import AudioSegment
from TTS.api import TTS
from TTS.utils.manage import ModelManager
import os
import torch
from TTS.utils.radam import RAdam
import numpy.core.multiarray
import shutil

torch.serialization.add_safe_globals([RAdam, numpy.core.multiarray.scalar])

import sys
sys.path.insert(0, "../Src/")

%autosave 5

Autosaving every 5 seconds


In [11]:
#dataSet = pd.read_csv('../Data/train_data.csv')
#dataSet[dataSet['speaker_id'] == 'MMDM0']

In [12]:
def readCsv(dataset):
    return pd.read_csv(f'../Data/{dataset}_data.csv')

In [13]:
def getSpeakers(df):
    speakerIds = df['speaker_id']
    return list(set(speakerIds))

In [14]:
def getFilesBySpeaker(df,speakerId):
    return df[df['speaker_id']==speakerId]

In [15]:
def concatenateAudio(speakerId, speakerDF):
    if speakerDF.empty:
        print(f"Empty DataFrame for speaker {speakerId}. Skipping.")
        return

    finalAudioFile = f'../Data/concatenatedInputs/{speakerId}.wav'
    audioData = speakerDF[speakerDF['filename'].str.endswith('.wav', na=False)]

    if audioData.empty:
        print(f"No .wav files for speaker {speakerId}. Skipping.")
        return

    audioFileList = audioData['path_from_data_dir']

    if not exists(finalAudioFile):
        concat_audio = AudioSegment.empty()
    else:
        concat_audio = AudioSegment.from_wav(finalAudioFile)
    for audioFile in audioFileList:
        try:
            audio = AudioSegment.from_wav(f'../Data/data/{audioFile}') + AudioSegment.silent(duration=1000)
            concat_audio += audio
        except Exception as e:
            print(f"Failed to load {audioFile}: {e}")

    if len(concat_audio) > 0:
        concat_audio.export(finalAudioFile, format='wav')
    else:
        print(f"No valid audio for speaker {speakerId}, nothing exported.")
    

In [None]:
def generateAudio(speakers, trainDF):
    if trainDF.empty or not speakers:
        print("Empty dataset or no speakers provided. Exiting.")
        return

    manager = ModelManager()
    all_models = manager.list_models()
    englishModels = [model for model in all_models if "/en/" in model]

    if not englishModels:
        print("No English models found. Exiting.")
        return

    for model in englishModels:
        speakerAndGeneratedSentencesRepository = {
            'speakerId': [],
            'generatedSentence': []
            }
        
        model_dir_name = model.replace("/", "_")
        modelDirectory = f'../Data/ttsOutputs/{model_dir_name}'
        os.makedirs(modelDirectory, exist_ok=True)

        try:
            tts = TTS(model_name=model, progress_bar=False, gpu=False)
        except Exception as e:
            print(f"Failed to load TTS model {model}: {e}")
            continue

        for speaker in speakers:
            speakerDF = getFilesBySpeaker(trainDF, speaker)
            if speakerDF.empty:
                print(f"No data for speaker {speaker}. Skipping.")
                continue
            textDF = speakerDF[speakerDF['path_from_data_dir'].str.contains('.TXT')]
            possibleSentences = textDF['path_from_data_dir'].tolist()
            if not possibleSentences:
                print(f"No valid sentences for speaker {speaker}. Skipping.")
                continue

            chosenSentenceFile = np.random.choice(possibleSentences)
            with open(f'../Data/data/{chosenSentenceFile}', 'r') as file:
                chosenSentence = file.read().strip()
                # Split the sentence by spaces and remove the first two parts (numbers)
                parts = chosenSentence.split(" ")
                chosenSentence = " ".join(parts[2:])  # Skip the first two parts

            if not chosenSentence:
                print(f"Chosen sentence for speaker {speaker} is empty. Skipping.")
                continue

            speakerAndGeneratedSentencesRepository['speakerId'].append(speaker)
            speakerAndGeneratedSentencesRepository['generatedSentence'].append(chosenSentence)

            audioFile = f'../Data/concatenatedInputs/{speaker}.wav'
            if not exists(audioFile):
                print(f"Audio file for {speaker} does not exist. Skipping.")
                continue

            try:
                outputFilePath = f'{modelDirectory}/{speaker}.wav'
                tts.tts_with_vc_to_file(
                    text=chosenSentence,
                    file_path=outputFilePath,
                    speaker_wav=audioFile
                )
                if not os.path.exists(outputFilePath):
                    print(f"Warning: Output file {outputFilePath} was not created.")
            except Exception as e:
                print(f"Failed to generate audio for speaker {speaker}: {e}")

        # Save the generated sentences repository
        try:
            generatedDF = pd.DataFrame(speakerAndGeneratedSentencesRepository)
            generatedDF.to_csv(f'../Data/ttsOutputs/{model_dir_name}_generatedSentences.csv', index=False)
        except Exception as e:
            print(f"Failed to save generated sentences: {e}")
        
        if (not exists('../Data/ttsOutputs/{model_dir_name}_generatedSentences.csv')):
            shutil.rmtree(modelDirectory)

In [17]:
def main():
    trainDF = readCsv('train')
    speakers = getSpeakers(trainDF)
    if not exists('../Data/concatenatedInputs'):
        os.makedirs('../Data/concatenatedInputs')
        for speaker in speakers:
            speakerDF = getFilesBySpeaker(trainDF,speaker)
            concatenateAudio(speaker,speakerDF)
    if not exists('../Data/ttsOutputs/'):
        os.makedirs('../Data/ttsOutputs/')
        generateAudio(speakers, trainDF)
    
    print("done")
    
    

In [18]:
if __name__ == '__main__':
    main()


 Name format: type/language/dataset/model
 1: tts_models/multilingual/multi-dataset/xtts_v2
 2: tts_models/multilingual/multi-dataset/xtts_v1.1
 3: tts_models/multilingual/multi-dataset/your_tts
 4: tts_models/multilingual/multi-dataset/bark
 5: tts_models/bg/cv/vits
 6: tts_models/cs/cv/vits
 7: tts_models/da/cv/vits
 8: tts_models/et/cv/vits
 9: tts_models/ga/cv/vits
 10: tts_models/en/ek1/tacotron2 [already downloaded]
 11: tts_models/en/ljspeech/tacotron2-DDC [already downloaded]
 12: tts_models/en/ljspeech/tacotron2-DDC_ph [already downloaded]
 13: tts_models/en/ljspeech/glow-tts [already downloaded]
 14: tts_models/en/ljspeech/speedy-speech [already downloaded]
 15: tts_models/en/ljspeech/tacotron2-DCA [already downloaded]
 16: tts_models/en/ljspeech/vits [already downloaded]
 17: tts_models/en/ljspeech/vits--neon [already downloaded]
 18: tts_models/en/ljspeech/fast_pitch [already downloaded]
 19: tts_models/en/ljspeech/overflow [already downloaded]
 20: tts_models/en/ljspeech/

KeyboardInterrupt: 