In [1]:
import torch
import torchaudio
from utils import load_txt, split_text_balanced, analyze_chunks
from model_utils import Speaker

from IPython.display import Audio

from pathlib import Path

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
def torchConcat(path, fname, concat, save=False):
    dirPath = Path("tests") / path
    file_name = fname[:-4] + "-TorchStory.wav"
    outfile = Path(dirPath) / file_name
    
    whole_track_lst = []
    for i in range(len(concat)):
        whole_track_lst.append(concat[i][0])
    
    whole_track_tup = tuple(whole_track_lst)
    whole_track = torch.cat(whole_track_tup, dim=1)

    if save is True:
        torchaudio.save(outfile, whole_track, concat[0][1])
        print("--------------------------------------------------")
        print(f"DONE! Torch concatenated audio file: {outfile}")
    return (whole_track, concat[0][1])

In [4]:
speaker_meditation = Speaker("meditation")
british_girl = Speaker("british-02")
novel_girl = Speaker("novel")

In [5]:
type(speaker_meditation)

model_utils.Speaker

In [13]:
path0 = "20-octopus-mantaray"
fname0 = "20-octopus-mantaray-pt2.txt"

In [14]:
sample_fable_raw = load_txt(path0, fname0)
sample_fable = split_text_balanced(sample_fable_raw)
analyze_chunks(sample_fable)

Number of chunks: 2
Length statistics:
  Min: 162
  Max: 283
  Average: 222.5
  Standard deviation: 60.5

Chunk lengths: [283, 162]

Chunk 1 (283 chars): Testing this. In Part 1, the cautious octopus faced its fear and approached the manta’s silent grace. Now, holding space between them, the octopus shares the radiant reef, an unspoken pact formed. Understanding grows, fear dims, yet respect remains—a balance of bravery and prudence.

Chunk 2 (162 chars): The giant’s majesty is not a threat but a presence to honor. The fable whispers: Familiarity fades fear, but wisdom keeps distance. Some shadows guide, not scare.


In [15]:
dict_zipped = tuple(zip(
    list(range(len(sample_fable))),
    [
        # speaker_meditation,
        # speaker_meditation,
        # british_girl,
        # british_girl,
        novel_girl,
        novel_girl,
        # speaker_meditation
    ]
))

In [16]:
for i, spkr in dict_zipped:
    print(i, spkr)

0 <model_utils.Speaker object at 0x7f619e4d8b90>
1 <model_utils.Speaker object at 0x7f619e4d8b90>


In [17]:
audio_store = []
for i, spkr in dict_zipped:
    audio_store.append(spkr.speak(sample_fable[i]))

Generating:  59%|███████████████████████████████████████████████████████████████████████████▋                                                    | 1530/2588 [00:27<00:19, 55.36it/s]
Generating:  34%|███████████████████████████████████████████▉                                                                                     | 882/2588 [00:15<00:30, 56.68it/s]


In [18]:
full_audio = torchConcat(path0, fname0, audio_store, save=True)

--------------------------------------------------
DONE! Torch concatenated audio file: tests/20-octopus-mantaray/20-octopus-mantaray-pt2-TorchStory.wav


In [19]:
Audio(full_audio[0], rate=full_audio[1])

## ---

In [6]:
a0 = speaker_meditation.speak("Taste the rainbow motherfucker! Now that we got that out of the way, lets focus on working smart. Go hard or go home.")

Generating:  25%|████████████████████████████████▋                                                                                                | 655/2588 [00:27<01:22, 23.40it/s]


In [7]:
char0 = "Taste the rainbow motherfucker! Now that we got that out of the way, let's focus on working smart. Go hard or go home."

In [8]:
len(char0.split())

23

In [9]:
char0.split(".")

["Taste the rainbow motherfucker! Now that we got that out of the way, let's focus on working smart",
 ' Go hard or go home',
 '']

In [10]:
Audio(a0[0], rate=a0[1])

## Split sentence into similar-length chunks

In [19]:
# Example usage
sample_text = """
Natural language processing is a field of artificial intelligence that focuses on the interaction between computers and human language. 
It involves developing algorithms and models that can understand, interpret, and generate human language in a valuable way. At least that is our hope. 
This technology powers many applications we use daily, including machine translation, chatbots, voice assistants, and sentiment analysis tools. 
The challenges in NLP are numerous and complex, ranging from handling ambiguity in language to understanding context and cultural nuances. 
Modern approaches often use deep learning techniques, particularly transformer architectures, which have revolutionized the field in recent years.
"""

# Split the text
chunks = split_text_balanced(sample_text.strip(), max_length=256, target_length=200)

# Analyze results
analyze_chunks(chunks)


Number of chunks: 5
Length statistics:
  Min: 135
  Max: 150
  Average: 142.4
  Standard deviation: 5.4

Chunk lengths: [135, 150, 143, 138, 146]

Chunk 1 (135 chars): Natural language processing is a field of artificial intelligence that focuses on the interaction be...

Chunk 2 (150 chars): It involves developing algorithms and models that can understand, interpret, and generate human lang...

Chunk 3 (143 chars): This technology powers many applications we use daily, including machine translation, chatbots, voic...

Chunk 4 (138 chars): The challenges in NLP are numerous and complex, ranging from handling ambiguity in language to under...

Chunk 5 (146 chars): Modern approaches often use deep learning techniques, particularly transformer architectures, which ...


In [20]:
chunks

['Natural language processing is a field of artificial intelligence that focuses on the interaction between computers and human language.',
 'It involves developing algorithms and models that can understand, interpret, and generate human language in a valuable way. At least that is our hope.',
 'This technology powers many applications we use daily, including machine translation, chatbots, voice assistants, and sentiment analysis tools.',
 'The challenges in NLP are numerous and complex, ranging from handling ambiguity in language to understanding context and cultural nuances.',
 'Modern approaches often use deep learning techniques, particularly transformer architectures, which have revolutionized the field in recent years.']