In [1]:
!git clone "https://github.com/jik876/hifi-gan.git"

Cloning into 'hifi-gan'...
remote: Enumerating objects: 48, done.[K
remote: Total 48 (delta 0), reused 0 (delta 0), pack-reused 48 (from 1)[K
Receiving objects: 100% (48/48), 620.94 KiB | 13.21 MiB/s, done.
Resolving deltas: 100% (20/20), done.


In [2]:
cd hifi-gan

/kaggle/working/hifi-gan


In [3]:
# import os
# import torch
# import torchaudio
# import numpy as np
# from tqdm import tqdm

# # Config values (taken from your config.json)
# sampling_rate = 22050
# n_fft = 1024
# win_size = 1024
# hop_size = 256
# num_mels = 80
# fmin = 0
# fmax = 8000

# # Input/output dirs
# input_dir = "/kaggle/input/dir4-singfox/Directory 4/songs/Hindi"         # <-- Update this
# output_dir = "/kaggle/working/hifi-gan/test_mel_files/Hindi"     # <-- Update this
# os.makedirs(output_dir, exist_ok=True)

# # Mel Spectrogram extractor
# mel_transform = torchaudio.transforms.MelSpectrogram(
#     sample_rate=sampling_rate,
#     n_fft=n_fft,
#     win_length=win_size,
#     hop_length=hop_size,
#     f_min=fmin,
#     f_max=fmax,
#     n_mels=num_mels,
#     power=1.0,
#     normalized=False
# )

# # Function: 16kHz WAV → log-mel → .npy
# def wav_to_mel(wav_path):
#     waveform, orig_sr = torchaudio.load(wav_path)
    
#     # Mono
#     if waveform.size(0) > 1:
#         waveform = waveform.mean(dim=0, keepdim=True)

#     # Resample to 22.05 kHz
#     if orig_sr != sampling_rate:
#         waveform = torchaudio.functional.resample(waveform, orig_sr, sampling_rate)

#     # Mel spectrogram → log-mel
#     mel = mel_transform(waveform)
#     mel = torch.log(torch.clamp(mel, min=1e-5))  # avoid log(0)
#     return mel.squeeze(0).cpu().numpy()

# # Process all files
# for fname in tqdm(os.listdir(input_dir)):
#     if not fname.endswith('.wav'):
#         continue

#     wav_path = os.path.join(input_dir, fname)
#     mel = wav_to_mel(wav_path)
#     npy_path = os.path.join(output_dir, fname.replace('.wav', '.npy'))
#     np.save(npy_path, mel)
#     print(f"Saved: {npy_path}")


In [4]:
# import glob
# import os
# import numpy as np
# import json
# import torch
# from scipy.io.wavfile import write
# from env import AttrDict
# from meldataset import MAX_WAV_VALUE
# from models import Generator

# # --- Modify paths here ---
# input_mels_dir = "/kaggle/working/hifi-gan/test_mel_files/Hindi"  # <- folder of .npy mel files
# output_dir = "/kaggle/working/Hindi_gen"
# checkpoint_file = "/kaggle/input/other-hifi-models/generator_v3"
# config_file = "/kaggle/input/other-hifi-models/config (1).json"
# # --------------------------

# # Load config
# with open(config_file) as f:
#     json_config = json.load(f)
# h = AttrDict(json_config)

# # Set device
# torch.manual_seed(h.seed)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# if device.type == "cuda":
#     torch.cuda.manual_seed(h.seed)

# # Load Generator
# generator = Generator(h).to(device)
# checkpoint_dict = torch.load(checkpoint_file, map_location=device)
# generator.load_state_dict(checkpoint_dict['generator'])
# generator.eval()
# generator.remove_weight_norm()

# # Inference
# os.makedirs(output_dir, exist_ok=True)
# filelist = sorted(os.listdir(input_mels_dir))

# with torch.no_grad():
#     for filname in filelist:
#         if not filname.endswith(".npy"):
#             continue
#         mel_path = os.path.join(input_mels_dir, filname)
#         x = np.load(mel_path)
#         x = torch.FloatTensor(x).unsqueeze(0).to(device)  # shape: (1, 80, T)
#         y_g_hat = generator(x)
#         audio = y_g_hat.squeeze().cpu().numpy()
#         audio = audio * MAX_WAV_VALUE
#         audio = audio.astype('int16')
#         output_path = os.path.join(output_dir, os.path.splitext(filname)[0] + ".wav")
#         write(output_path, h.sampling_rate, audio)
#         print("Saved:", output_path)


In [5]:
# !zip -r Hindi_file.zip /kaggle/working/Hindi_gen
# from IPython.display import FileLink
# FileLink(r'Hindi_file.zip')

In [6]:
import os
import torch
import torchaudio
import numpy as np
import json
from tqdm import tqdm
from scipy.io.wavfile import write
from env import AttrDict
from meldataset import MAX_WAV_VALUE
from models import Generator
from IPython.display import FileLink

# === CONFIG ===
sampling_rate = 22050
n_fft = 1024
win_size = 1024
hop_size = 256
num_mels = 80
fmin = 0
fmax = 8000

# === PATHS ===
base_input_dir = "/kaggle/input/singfox-dataset/dir5_singfox/Directory 5/songs"
base_mel_dir = "/kaggle/working/hifi-gan/test_mel_files"
base_output_dir = "/kaggle/working/generated_audio"
checkpoint_file = "/kaggle/input/other-hifi-models/generator_v3"
config_file = "/kaggle/input/other-hifi-models/config (1).json"

# === LOAD MODEL CONFIG ===
with open(config_file) as f:
    json_config = json.load(f)
h = AttrDict(json_config)

# === DEVICE ===
torch.manual_seed(h.seed)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if device.type == "cuda":
    torch.cuda.manual_seed(h.seed)

# === LOAD GENERATOR ===
generator = Generator(h).to(device)
checkpoint_dict = torch.load(checkpoint_file, map_location=device)
generator.load_state_dict(checkpoint_dict['generator'])
generator.eval()
generator.remove_weight_norm()

# === MEL TRANSFORM ===
mel_transform = torchaudio.transforms.MelSpectrogram(
    sample_rate=sampling_rate,
    n_fft=n_fft,
    win_length=win_size,
    hop_length=hop_size,
    f_min=fmin,
    f_max=fmax,
    n_mels=num_mels,
    power=1.0,
    normalized=False
)

# === FUNCTION TO CONVERT WAV TO MEL ===
def wav_to_mel(wav_path):
    waveform, orig_sr = torchaudio.load(wav_path)
    if waveform.size(0) > 1:
        waveform = waveform.mean(dim=0, keepdim=True)
    if orig_sr != sampling_rate:
        waveform = torchaudio.functional.resample(waveform, orig_sr, sampling_rate)
    mel = mel_transform(waveform)
    mel = torch.log(torch.clamp(mel, min=1e-5))
    return mel.squeeze(0).cpu().numpy()

# === PROCESS LANGUAGES ===
languages = sorted([lang for lang in os.listdir(base_input_dir) if os.path.isdir(os.path.join(base_input_dir, lang))])
for language in tqdm(languages, desc="Processing Languages"):
    input_lang_dir = os.path.join(base_input_dir, language)
    mel_lang_dir = os.path.join(base_mel_dir, language)
    output_lang_dir = os.path.join(base_output_dir, language)

    os.makedirs(mel_lang_dir, exist_ok=True)
    os.makedirs(output_lang_dir, exist_ok=True)

    # === MEL EXTRACTION ===
    wav_files = [f for f in os.listdir(input_lang_dir) if f.endswith('.wav')]
    for fname in tqdm(wav_files, desc=f"Mel → {language}", leave=False):
        wav_path = os.path.join(input_lang_dir, fname)
        mel = wav_to_mel(wav_path)
        np.save(os.path.join(mel_lang_dir, fname.replace('.wav', '.npy')), mel)

    # === INFERENCE ===
    mel_files = sorted([f for f in os.listdir(mel_lang_dir) if f.endswith('.npy')])
    with torch.no_grad():
        for fname in tqdm(mel_files, desc=f"HiFi-GAN → {language}", leave=False):
            mel_path = os.path.join(mel_lang_dir, fname)
            x = np.load(mel_path)
            x = torch.FloatTensor(x).unsqueeze(0).to(device)
            y_g_hat = generator(x)
            audio = y_g_hat.squeeze().cpu().numpy()
            audio = (audio * MAX_WAV_VALUE).astype('int16')
            output_path = os.path.join(output_lang_dir, fname.replace('.npy', '.wav'))
            write(output_path, h.sampling_rate, audio)

    # === ZIP OUTPUT ===
    zip_path = f"/kaggle/working/{language}_generated.zip"
    os.system(f"zip -r -q {zip_path} {output_lang_dir}")  # -q for quiet zip
    print(f"\n✅ Zipped: {zip_path}")
    display(FileLink(zip_path))


  WeightNorm.apply(module, name, dim)


Removing weight norm...


Processing Languages:   0%|          | 0/20 [00:00<?, ?it/s]
Mel → Bengali:   0%|          | 0/500 [00:00<?, ?it/s][A
Mel → Bengali:   0%|          | 1/500 [00:00<01:38,  5.07it/s][A
Mel → Bengali:   1%|▏         | 7/500 [00:00<00:18, 26.80it/s][A
Mel → Bengali:   3%|▎         | 14/500 [00:00<00:11, 40.94it/s][A
Mel → Bengali:   4%|▍         | 20/500 [00:00<00:10, 46.29it/s][A
Mel → Bengali:   5%|▌         | 26/500 [00:00<00:09, 47.90it/s][A
Mel → Bengali:   6%|▋         | 32/500 [00:00<00:09, 48.57it/s][A
Mel → Bengali:   8%|▊         | 38/500 [00:00<00:09, 51.09it/s][A
Mel → Bengali:   9%|▉         | 44/500 [00:00<00:09, 50.00it/s][A
Mel → Bengali:  10%|█         | 50/500 [00:01<00:09, 48.63it/s][A
Mel → Bengali:  11%|█         | 55/500 [00:01<00:09, 48.19it/s][A
Mel → Bengali:  12%|█▏        | 61/500 [00:01<00:08, 49.27it/s][A
Mel → Bengali:  13%|█▎        | 67/500 [00:01<00:08, 50.99it/s][A
Mel → Bengali:  15%|█▍        | 73/500 [00:01<00:08, 51.65it/s][A
Mel → Bengal


✅ Zipped: /kaggle/working/Bengali_generated.zip


Processing Languages:   5%|▌         | 1/20 [00:14<04:30, 14.21s/it]
Mel → English:   0%|          | 0/500 [00:00<?, ?it/s][A
Mel → English:   1%|▏         | 7/500 [00:00<00:07, 69.69it/s][A
Mel → English:   3%|▎         | 14/500 [00:00<00:07, 67.84it/s][A
Mel → English:   4%|▍         | 21/500 [00:00<00:07, 67.94it/s][A
Mel → English:   6%|▌         | 28/500 [00:00<00:07, 65.09it/s][A
Mel → English:   7%|▋         | 35/500 [00:00<00:07, 63.97it/s][A
Mel → English:   8%|▊         | 42/500 [00:00<00:07, 64.32it/s][A
Mel → English:  10%|▉         | 49/500 [00:00<00:07, 62.12it/s][A
Mel → English:  11%|█         | 56/500 [00:00<00:07, 60.76it/s][A
Mel → English:  13%|█▎        | 63/500 [00:01<00:07, 57.42it/s][A
Mel → English:  14%|█▍        | 69/500 [00:01<00:07, 56.05it/s][A
Mel → English:  15%|█▌        | 75/500 [00:01<00:07, 56.90it/s][A
Mel → English:  16%|█▌        | 81/500 [00:01<00:07, 56.38it/s][A
Mel → English:  18%|█▊        | 88/500 [00:01<00:07, 58.29it/s][A
Mel


✅ Zipped: /kaggle/working/English_generated.zip


Processing Languages:  10%|█         | 2/20 [00:26<03:56, 13.13s/it]
Mel → French:   0%|          | 0/500 [00:00<?, ?it/s][A
Mel → French:   1%|          | 6/500 [00:00<00:08, 57.26it/s][A
Mel → French:   2%|▏         | 12/500 [00:00<00:09, 51.40it/s][A
Mel → French:   4%|▎         | 18/500 [00:00<00:09, 50.71it/s][A
Mel → French:   5%|▍         | 24/500 [00:00<00:09, 50.44it/s][A
Mel → French:   6%|▌         | 30/500 [00:00<00:08, 52.29it/s][A
Mel → French:   7%|▋         | 36/500 [00:00<00:08, 51.59it/s][A
Mel → French:   8%|▊         | 42/500 [00:00<00:08, 52.99it/s][A
Mel → French:  10%|▉         | 48/500 [00:00<00:08, 51.15it/s][A
Mel → French:  11%|█         | 54/500 [00:01<00:08, 53.08it/s][A
Mel → French:  12%|█▏        | 60/500 [00:01<00:08, 54.49it/s][A
Mel → French:  14%|█▎        | 68/500 [00:01<00:07, 60.37it/s][A
Mel → French:  15%|█▌        | 76/500 [00:01<00:06, 63.72it/s][A
Mel → French:  17%|█▋        | 83/500 [00:01<00:06, 65.30it/s][A
Mel → French:  18


✅ Zipped: /kaggle/working/French_generated.zip


Processing Languages:  15%|█▌        | 3/20 [00:39<03:39, 12.94s/it]
Mel → German:   0%|          | 0/500 [00:00<?, ?it/s][A
Mel → German:   2%|▏         | 9/500 [00:00<00:06, 80.35it/s][A
Mel → German:   4%|▎         | 18/500 [00:00<00:06, 75.48it/s][A
Mel → German:   5%|▌         | 26/500 [00:00<00:06, 72.07it/s][A
Mel → German:   7%|▋         | 34/500 [00:00<00:06, 67.99it/s][A
Mel → German:   8%|▊         | 41/500 [00:00<00:07, 63.66it/s][A
Mel → German:  10%|▉         | 48/500 [00:00<00:07, 60.23it/s][A
Mel → German:  11%|█         | 55/500 [00:00<00:07, 59.32it/s][A
Mel → German:  12%|█▏        | 62/500 [00:00<00:07, 60.69it/s][A
Mel → German:  14%|█▍        | 69/500 [00:01<00:07, 60.86it/s][A
Mel → German:  15%|█▌        | 76/500 [00:01<00:06, 63.21it/s][A
Mel → German:  17%|█▋        | 84/500 [00:01<00:06, 64.96it/s][A
Mel → German:  18%|█▊        | 92/500 [00:01<00:05, 68.23it/s][A
Mel → German:  20%|██        | 100/500 [00:01<00:05, 70.38it/s][A
Mel → German:  2


✅ Zipped: /kaggle/working/German_generated.zip


Processing Languages:  20%|██        | 4/20 [00:51<03:21, 12.60s/it]
Mel → Hindi:   0%|          | 0/500 [00:00<?, ?it/s][A
Mel → Hindi:   2%|▏         | 8/500 [00:00<00:06, 74.54it/s][A
Mel → Hindi:   3%|▎         | 16/500 [00:00<00:06, 71.76it/s][A
Mel → Hindi:   5%|▍         | 24/500 [00:00<00:06, 71.78it/s][A
Mel → Hindi:   6%|▋         | 32/500 [00:00<00:06, 71.11it/s][A
Mel → Hindi:   8%|▊         | 40/500 [00:00<00:06, 68.27it/s][A
Mel → Hindi:   9%|▉         | 47/500 [00:00<00:06, 67.19it/s][A
Mel → Hindi:  11%|█         | 54/500 [00:00<00:06, 64.79it/s][A
Mel → Hindi:  12%|█▏        | 62/500 [00:00<00:06, 67.93it/s][A
Mel → Hindi:  14%|█▍        | 69/500 [00:01<00:06, 68.03it/s][A
Mel → Hindi:  15%|█▌        | 76/500 [00:01<00:06, 67.59it/s][A
Mel → Hindi:  17%|█▋        | 83/500 [00:01<00:06, 65.49it/s][A
Mel → Hindi:  18%|█▊        | 90/500 [00:01<00:06, 64.95it/s][A
Mel → Hindi:  19%|█▉        | 97/500 [00:01<00:06, 63.55it/s][A
Mel → Hindi:  21%|██        | 1


✅ Zipped: /kaggle/working/Hindi_generated.zip


Processing Languages:  25%|██▌       | 5/20 [01:03<03:07, 12.51s/it]
Mel → Indonesian:   0%|          | 0/500 [00:00<?, ?it/s][A
Mel → Indonesian:   2%|▏         | 9/500 [00:00<00:06, 80.56it/s][A
Mel → Indonesian:   4%|▎         | 18/500 [00:00<00:06, 80.14it/s][A
Mel → Indonesian:   5%|▌         | 27/500 [00:00<00:06, 75.99it/s][A
Mel → Indonesian:   7%|▋         | 35/500 [00:00<00:06, 68.53it/s][A
Mel → Indonesian:   9%|▊         | 43/500 [00:00<00:06, 70.56it/s][A
Mel → Indonesian:  10%|█         | 51/500 [00:00<00:06, 70.96it/s][A
Mel → Indonesian:  12%|█▏        | 59/500 [00:00<00:06, 70.46it/s][A
Mel → Indonesian:  13%|█▎        | 67/500 [00:00<00:06, 68.28it/s][A
Mel → Indonesian:  15%|█▍        | 74/500 [00:01<00:06, 68.19it/s][A
Mel → Indonesian:  16%|█▌        | 81/500 [00:01<00:06, 66.70it/s][A
Mel → Indonesian:  18%|█▊        | 89/500 [00:01<00:06, 67.04it/s][A
Mel → Indonesian:  19%|█▉        | 96/500 [00:01<00:06, 67.04it/s][A
Mel → Indonesian:  21%|██      


✅ Zipped: /kaggle/working/Indonesian_generated.zip


Processing Languages:  30%|███       | 6/20 [01:15<02:52, 12.34s/it]
Mel → Italian:   0%|          | 0/500 [00:00<?, ?it/s][A
Mel → Italian:   2%|▏         | 8/500 [00:00<00:06, 73.17it/s][A
Mel → Italian:   3%|▎         | 16/500 [00:00<00:07, 68.48it/s][A
Mel → Italian:   5%|▍         | 23/500 [00:00<00:06, 68.19it/s][A
Mel → Italian:   6%|▌         | 30/500 [00:00<00:07, 64.29it/s][A
Mel → Italian:   8%|▊         | 38/500 [00:00<00:06, 67.45it/s][A
Mel → Italian:   9%|▉         | 45/500 [00:00<00:06, 65.89it/s][A
Mel → Italian:  11%|█         | 53/500 [00:00<00:06, 66.54it/s][A
Mel → Italian:  12%|█▏        | 61/500 [00:00<00:06, 69.20it/s][A
Mel → Italian:  14%|█▍        | 69/500 [00:01<00:06, 68.50it/s][A
Mel → Italian:  15%|█▌        | 76/500 [00:01<00:06, 68.17it/s][A
Mel → Italian:  17%|█▋        | 83/500 [00:01<00:06, 65.75it/s][A
Mel → Italian:  18%|█▊        | 90/500 [00:01<00:06, 63.55it/s][A
Mel → Italian:  19%|█▉        | 97/500 [00:01<00:06, 59.24it/s][A
Mel


✅ Zipped: /kaggle/working/Italian_generated.zip


Processing Languages:  35%|███▌      | 7/20 [01:28<02:40, 12.36s/it]
Mel → Japanese:   0%|          | 0/500 [00:00<?, ?it/s][A
Mel → Japanese:   2%|▏         | 8/500 [00:00<00:06, 76.13it/s][A
Mel → Japanese:   3%|▎         | 16/500 [00:00<00:06, 77.11it/s][A
Mel → Japanese:   5%|▍         | 24/500 [00:00<00:06, 78.25it/s][A
Mel → Japanese:   6%|▋         | 32/500 [00:00<00:06, 67.81it/s][A
Mel → Japanese:   8%|▊         | 39/500 [00:00<00:07, 64.24it/s][A
Mel → Japanese:   9%|▉         | 46/500 [00:00<00:07, 61.04it/s][A
Mel → Japanese:  11%|█         | 53/500 [00:00<00:07, 63.57it/s][A
Mel → Japanese:  12%|█▏        | 60/500 [00:00<00:06, 64.59it/s][A
Mel → Japanese:  13%|█▎        | 67/500 [00:01<00:06, 64.24it/s][A
Mel → Japanese:  15%|█▍        | 74/500 [00:01<00:06, 63.46it/s][A
Mel → Japanese:  16%|█▌        | 81/500 [00:01<00:06, 63.28it/s][A
Mel → Japanese:  18%|█▊        | 88/500 [00:01<00:06, 64.02it/s][A
Mel → Japanese:  19%|█▉        | 95/500 [00:01<00:06, 65.


✅ Zipped: /kaggle/working/Japanese_generated.zip


Processing Languages:  40%|████      | 8/20 [01:40<02:28, 12.35s/it]
Mel → Korean:   0%|          | 0/500 [00:00<?, ?it/s][A
Mel → Korean:   2%|▏         | 8/500 [00:00<00:06, 71.52it/s][A
Mel → Korean:   3%|▎         | 16/500 [00:00<00:06, 69.94it/s][A
Mel → Korean:   5%|▍         | 23/500 [00:00<00:07, 63.18it/s][A
Mel → Korean:   6%|▌         | 31/500 [00:00<00:07, 65.35it/s][A
Mel → Korean:   8%|▊         | 39/500 [00:00<00:06, 68.61it/s][A
Mel → Korean:   9%|▉         | 47/500 [00:00<00:06, 69.54it/s][A
Mel → Korean:  11%|█         | 55/500 [00:00<00:06, 70.83it/s][A
Mel → Korean:  13%|█▎        | 63/500 [00:00<00:06, 71.56it/s][A
Mel → Korean:  14%|█▍        | 71/500 [00:01<00:06, 66.77it/s][A
Mel → Korean:  16%|█▌        | 78/500 [00:01<00:06, 64.41it/s][A
Mel → Korean:  17%|█▋        | 85/500 [00:01<00:06, 63.91it/s][A
Mel → Korean:  19%|█▊        | 93/500 [00:01<00:06, 66.10it/s][A
Mel → Korean:  20%|██        | 101/500 [00:01<00:05, 68.12it/s][A
Mel → Korean:  2


✅ Zipped: /kaggle/working/Korean_generated.zip


Processing Languages:  45%|████▌     | 9/20 [01:52<02:14, 12.22s/it]
Mel → Mandarin Chinese:   0%|          | 0/500 [00:00<?, ?it/s][A
Mel → Mandarin Chinese:   1%|▏         | 7/500 [00:00<00:07, 68.12it/s][A
Mel → Mandarin Chinese:   3%|▎         | 14/500 [00:00<00:07, 62.47it/s][A
Mel → Mandarin Chinese:   4%|▍         | 22/500 [00:00<00:06, 69.27it/s][A
Mel → Mandarin Chinese:   6%|▌         | 29/500 [00:00<00:06, 68.53it/s][A
Mel → Mandarin Chinese:   7%|▋         | 36/500 [00:00<00:06, 67.66it/s][A
Mel → Mandarin Chinese:   9%|▊         | 43/500 [00:00<00:07, 64.51it/s][A
Mel → Mandarin Chinese:  10%|█         | 50/500 [00:00<00:07, 63.72it/s][A
Mel → Mandarin Chinese:  11%|█▏        | 57/500 [00:00<00:06, 63.78it/s][A
Mel → Mandarin Chinese:  13%|█▎        | 64/500 [00:00<00:06, 62.95it/s][A
Mel → Mandarin Chinese:  14%|█▍        | 71/500 [00:01<00:07, 57.91it/s][A
Mel → Mandarin Chinese:  15%|█▌        | 77/500 [00:01<00:07, 56.22it/s][A
Mel → Mandarin Chinese:  17%|


zip error: Nothing to do! (try: zip -r -q /kaggle/working/Mandarin . -i Chinese_generated.zip /kaggle/working/generated_audio/Mandarin Chinese)

✅ Zipped: /kaggle/working/Mandarin Chinese_generated.zip


Processing Languages:  50%|█████     | 10/20 [02:03<01:58, 11.82s/it]
Mel → Marathi:   0%|          | 0/500 [00:00<?, ?it/s][A
Mel → Marathi:   2%|▏         | 9/500 [00:00<00:06, 77.85it/s][A
Mel → Marathi:   3%|▎         | 17/500 [00:00<00:07, 68.46it/s][A
Mel → Marathi:   5%|▍         | 24/500 [00:00<00:07, 66.22it/s][A
Mel → Marathi:   6%|▋         | 32/500 [00:00<00:06, 69.06it/s][A
Mel → Marathi:   8%|▊         | 39/500 [00:00<00:06, 67.88it/s][A
Mel → Marathi:   9%|▉         | 47/500 [00:00<00:06, 69.57it/s][A
Mel → Marathi:  11%|█         | 55/500 [00:00<00:06, 71.22it/s][A
Mel → Marathi:  13%|█▎        | 63/500 [00:00<00:05, 73.41it/s][A
Mel → Marathi:  14%|█▍        | 71/500 [00:01<00:06, 69.73it/s][A
Mel → Marathi:  16%|█▌        | 79/500 [00:01<00:06, 66.70it/s][A
Mel → Marathi:  17%|█▋        | 86/500 [00:01<00:06, 64.61it/s][A
Mel → Marathi:  19%|█▊        | 93/500 [00:01<00:06, 65.46it/s][A
Mel → Marathi:  20%|██        | 100/500 [00:01<00:06, 65.79it/s][A
M


✅ Zipped: /kaggle/working/Marathi_generated.zip


Processing Languages:  55%|█████▌    | 11/20 [02:15<01:46, 11.88s/it]
Mel → Portuguese:   0%|          | 0/500 [00:00<?, ?it/s][A
Mel → Portuguese:   1%|▏         | 7/500 [00:00<00:07, 63.08it/s][A
Mel → Portuguese:   3%|▎         | 14/500 [00:00<00:07, 65.64it/s][A
Mel → Portuguese:   4%|▍         | 22/500 [00:00<00:06, 69.62it/s][A
Mel → Portuguese:   6%|▌         | 29/500 [00:00<00:07, 66.59it/s][A
Mel → Portuguese:   7%|▋         | 36/500 [00:00<00:07, 63.71it/s][A
Mel → Portuguese:   9%|▊         | 43/500 [00:00<00:07, 62.44it/s][A
Mel → Portuguese:  10%|█         | 50/500 [00:00<00:07, 61.99it/s][A
Mel → Portuguese:  11%|█▏        | 57/500 [00:00<00:07, 62.44it/s][A
Mel → Portuguese:  13%|█▎        | 64/500 [00:01<00:06, 62.32it/s][A
Mel → Portuguese:  14%|█▍        | 71/500 [00:01<00:07, 60.93it/s][A
Mel → Portuguese:  16%|█▌        | 78/500 [00:01<00:06, 62.33it/s][A
Mel → Portuguese:  17%|█▋        | 86/500 [00:01<00:06, 65.29it/s][A
Mel → Portuguese:  19%|█▉     


✅ Zipped: /kaggle/working/Portuguese_generated.zip


Processing Languages:  60%|██████    | 12/20 [02:27<01:36, 12.03s/it]
Mel → Punjabi:   0%|          | 0/500 [00:00<?, ?it/s][A
Mel → Punjabi:   2%|▏         | 8/500 [00:00<00:06, 75.68it/s][A
Mel → Punjabi:   3%|▎         | 16/500 [00:00<00:06, 77.19it/s][A
Mel → Punjabi:   5%|▍         | 24/500 [00:00<00:06, 74.16it/s][A
Mel → Punjabi:   6%|▋         | 32/500 [00:00<00:06, 73.70it/s][A
Mel → Punjabi:   8%|▊         | 40/500 [00:00<00:06, 75.17it/s][A
Mel → Punjabi:  10%|▉         | 48/500 [00:00<00:06, 72.42it/s][A
Mel → Punjabi:  11%|█         | 56/500 [00:00<00:06, 70.35it/s][A
Mel → Punjabi:  13%|█▎        | 64/500 [00:00<00:06, 69.70it/s][A
Mel → Punjabi:  14%|█▍        | 71/500 [00:00<00:06, 67.92it/s][A
Mel → Punjabi:  16%|█▌        | 78/500 [00:01<00:06, 67.29it/s][A
Mel → Punjabi:  17%|█▋        | 85/500 [00:01<00:06, 66.38it/s][A
Mel → Punjabi:  19%|█▊        | 93/500 [00:01<00:06, 67.69it/s][A
Mel → Punjabi:  20%|██        | 100/500 [00:01<00:06, 65.53it/s][A
M


✅ Zipped: /kaggle/working/Punjabi_generated.zip


Processing Languages:  65%|██████▌   | 13/20 [02:40<01:25, 12.16s/it]
Mel → Russian:   0%|          | 0/500 [00:00<?, ?it/s][A
Mel → Russian:   1%|▏         | 7/500 [00:00<00:07, 62.21it/s][A
Mel → Russian:   3%|▎         | 15/500 [00:00<00:07, 67.88it/s][A
Mel → Russian:   5%|▍         | 23/500 [00:00<00:06, 69.70it/s][A
Mel → Russian:   6%|▌         | 30/500 [00:00<00:06, 67.31it/s][A
Mel → Russian:   7%|▋         | 37/500 [00:00<00:06, 66.60it/s][A
Mel → Russian:   9%|▉         | 45/500 [00:00<00:06, 67.79it/s][A
Mel → Russian:  10%|█         | 52/500 [00:00<00:06, 67.60it/s][A
Mel → Russian:  12%|█▏        | 59/500 [00:00<00:06, 68.05it/s][A
Mel → Russian:  13%|█▎        | 67/500 [00:00<00:06, 68.45it/s][A
Mel → Russian:  15%|█▌        | 75/500 [00:01<00:05, 71.13it/s][A
Mel → Russian:  17%|█▋        | 83/500 [00:01<00:05, 72.98it/s][A
Mel → Russian:  18%|█▊        | 91/500 [00:01<00:05, 74.52it/s][A
Mel → Russian:  20%|█▉        | 99/500 [00:01<00:05, 73.73it/s][A
Me


✅ Zipped: /kaggle/working/Russian_generated.zip


Processing Languages:  70%|███████   | 14/20 [02:52<01:13, 12.22s/it]
Mel → Spanish:   0%|          | 0/500 [00:00<?, ?it/s][A
Mel → Spanish:   2%|▏         | 8/500 [00:00<00:06, 75.21it/s][A
Mel → Spanish:   3%|▎         | 16/500 [00:00<00:06, 74.18it/s][A
Mel → Spanish:   5%|▍         | 24/500 [00:00<00:07, 66.11it/s][A
Mel → Spanish:   6%|▌         | 31/500 [00:00<00:07, 63.50it/s][A
Mel → Spanish:   8%|▊         | 38/500 [00:00<00:07, 65.37it/s][A
Mel → Spanish:   9%|▉         | 45/500 [00:00<00:06, 65.50it/s][A
Mel → Spanish:  11%|█         | 53/500 [00:00<00:06, 69.76it/s][A
Mel → Spanish:  12%|█▏        | 61/500 [00:00<00:06, 70.28it/s][A
Mel → Spanish:  14%|█▍        | 69/500 [00:00<00:05, 72.20it/s][A
Mel → Spanish:  15%|█▌        | 77/500 [00:01<00:05, 72.66it/s][A
Mel → Spanish:  17%|█▋        | 85/500 [00:01<00:06, 68.69it/s][A
Mel → Spanish:  18%|█▊        | 92/500 [00:01<00:06, 65.75it/s][A
Mel → Spanish:  20%|█▉        | 99/500 [00:01<00:06, 64.66it/s][A
Me


✅ Zipped: /kaggle/working/Spanish_generated.zip


Processing Languages:  75%|███████▌  | 15/20 [03:04<01:01, 12.20s/it]
Mel → Standard Arabic:   0%|          | 0/500 [00:00<?, ?it/s][A
Mel → Standard Arabic:   2%|▏         | 8/500 [00:00<00:06, 76.31it/s][A
Mel → Standard Arabic:   3%|▎         | 16/500 [00:00<00:07, 63.33it/s][A
Mel → Standard Arabic:   5%|▍         | 23/500 [00:00<00:07, 62.12it/s][A
Mel → Standard Arabic:   6%|▌         | 30/500 [00:00<00:07, 61.71it/s][A
Mel → Standard Arabic:   7%|▋         | 37/500 [00:00<00:08, 57.51it/s][A
Mel → Standard Arabic:   9%|▊         | 43/500 [00:00<00:07, 58.08it/s][A
Mel → Standard Arabic:  10%|█         | 50/500 [00:00<00:07, 58.32it/s][A
Mel → Standard Arabic:  11%|█         | 56/500 [00:00<00:07, 58.05it/s][A
Mel → Standard Arabic:  12%|█▏        | 62/500 [00:01<00:07, 57.69it/s][A
Mel → Standard Arabic:  14%|█▎        | 68/500 [00:01<00:07, 56.42it/s][A
Mel → Standard Arabic:  15%|█▍        | 74/500 [00:01<00:07, 56.01it/s][A
Mel → Standard Arabic:  16%|█▌        | 


zip error: Nothing to do! (try: zip -r -q /kaggle/working/Standard . -i Arabic_generated.zip /kaggle/working/generated_audio/Standard Arabic)

✅ Zipped: /kaggle/working/Standard Arabic_generated.zip


Processing Languages:  80%|████████  | 16/20 [03:15<00:47, 11.78s/it]
Mel → Tamil:   0%|          | 0/500 [00:00<?, ?it/s][A
Mel → Tamil:   1%|▏         | 7/500 [00:00<00:07, 69.00it/s][A
Mel → Tamil:   3%|▎         | 15/500 [00:00<00:06, 71.86it/s][A
Mel → Tamil:   5%|▍         | 23/500 [00:00<00:06, 74.03it/s][A
Mel → Tamil:   6%|▌         | 31/500 [00:00<00:06, 70.92it/s][A
Mel → Tamil:   8%|▊         | 39/500 [00:00<00:06, 69.40it/s][A
Mel → Tamil:   9%|▉         | 46/500 [00:00<00:06, 68.43it/s][A
Mel → Tamil:  11%|█         | 53/500 [00:00<00:06, 66.95it/s][A
Mel → Tamil:  12%|█▏        | 60/500 [00:00<00:06, 64.94it/s][A
Mel → Tamil:  13%|█▎        | 67/500 [00:00<00:06, 64.97it/s][A
Mel → Tamil:  15%|█▍        | 74/500 [00:01<00:06, 65.68it/s][A
Mel → Tamil:  16%|█▌        | 81/500 [00:01<00:06, 66.33it/s][A
Mel → Tamil:  18%|█▊        | 88/500 [00:01<00:06, 60.89it/s][A
Mel → Tamil:  19%|█▉        | 95/500 [00:01<00:06, 63.24it/s][A
Mel → Tamil:  20%|██        | 


✅ Zipped: /kaggle/working/Tamil_generated.zip


Processing Languages:  85%|████████▌ | 17/20 [03:28<00:36, 12.01s/it]
Mel → Telugu:   0%|          | 0/500 [00:00<?, ?it/s][A
Mel → Telugu:   2%|▏         | 8/500 [00:00<00:06, 72.61it/s][A
Mel → Telugu:   3%|▎         | 16/500 [00:00<00:06, 70.96it/s][A
Mel → Telugu:   5%|▍         | 24/500 [00:00<00:07, 62.72it/s][A
Mel → Telugu:   6%|▌         | 31/500 [00:00<00:07, 59.80it/s][A
Mel → Telugu:   8%|▊         | 39/500 [00:00<00:07, 63.65it/s][A
Mel → Telugu:   9%|▉         | 46/500 [00:00<00:06, 65.29it/s][A
Mel → Telugu:  11%|█         | 53/500 [00:00<00:07, 63.12it/s][A
Mel → Telugu:  12%|█▏        | 61/500 [00:00<00:06, 65.80it/s][A
Mel → Telugu:  14%|█▍        | 69/500 [00:01<00:06, 67.85it/s][A
Mel → Telugu:  15%|█▌        | 77/500 [00:01<00:06, 69.37it/s][A
Mel → Telugu:  17%|█▋        | 85/500 [00:01<00:05, 71.53it/s][A
Mel → Telugu:  19%|█▊        | 93/500 [00:01<00:05, 70.04it/s][A
Mel → Telugu:  20%|██        | 101/500 [00:01<00:05, 71.39it/s][A
Mel → Telugu:  


✅ Zipped: /kaggle/working/Telugu_generated.zip


Processing Languages:  90%|█████████ | 18/20 [03:40<00:24, 12.11s/it]
Mel → Turkish:   0%|          | 0/500 [00:00<?, ?it/s][A
Mel → Turkish:   2%|▏         | 8/500 [00:00<00:06, 76.80it/s][A
Mel → Turkish:   3%|▎         | 17/500 [00:00<00:06, 79.39it/s][A
Mel → Turkish:   5%|▌         | 25/500 [00:00<00:06, 76.38it/s][A
Mel → Turkish:   7%|▋         | 33/500 [00:00<00:06, 72.75it/s][A
Mel → Turkish:   8%|▊         | 41/500 [00:00<00:06, 72.90it/s][A
Mel → Turkish:  10%|▉         | 49/500 [00:00<00:06, 72.86it/s][A
Mel → Turkish:  11%|█▏        | 57/500 [00:00<00:06, 73.33it/s][A
Mel → Turkish:  13%|█▎        | 65/500 [00:00<00:05, 73.32it/s][A
Mel → Turkish:  15%|█▍        | 73/500 [00:01<00:06, 69.46it/s][A
Mel → Turkish:  16%|█▌        | 81/500 [00:01<00:05, 70.27it/s][A
Mel → Turkish:  18%|█▊        | 89/500 [00:01<00:05, 69.60it/s][A
Mel → Turkish:  19%|█▉        | 96/500 [00:01<00:05, 67.67it/s][A
Mel → Turkish:  21%|██        | 103/500 [00:01<00:05, 66.38it/s][A
M


✅ Zipped: /kaggle/working/Turkish_generated.zip


Processing Languages:  95%|█████████▌| 19/20 [03:52<00:12, 12.12s/it]
Mel → Vietnamese:   0%|          | 0/500 [00:00<?, ?it/s][A
Mel → Vietnamese:   1%|▏         | 7/500 [00:00<00:07, 65.36it/s][A
Mel → Vietnamese:   3%|▎         | 15/500 [00:00<00:06, 70.06it/s][A
Mel → Vietnamese:   5%|▍         | 23/500 [00:00<00:07, 63.64it/s][A
Mel → Vietnamese:   6%|▌         | 30/500 [00:00<00:07, 60.96it/s][A
Mel → Vietnamese:   7%|▋         | 37/500 [00:00<00:07, 60.91it/s][A
Mel → Vietnamese:   9%|▉         | 44/500 [00:00<00:07, 63.58it/s][A
Mel → Vietnamese:  10%|█         | 51/500 [00:00<00:06, 64.73it/s][A
Mel → Vietnamese:  12%|█▏        | 58/500 [00:00<00:06, 64.60it/s][A
Mel → Vietnamese:  13%|█▎        | 65/500 [00:01<00:07, 61.53it/s][A
Mel → Vietnamese:  14%|█▍        | 72/500 [00:01<00:07, 61.02it/s][A
Mel → Vietnamese:  16%|█▌        | 79/500 [00:01<00:06, 61.78it/s][A
Mel → Vietnamese:  17%|█▋        | 87/500 [00:01<00:06, 62.87it/s][A
Mel → Vietnamese:  19%|█▉     


✅ Zipped: /kaggle/working/Vietnamese_generated.zip


Processing Languages: 100%|██████████| 20/20 [04:04<00:00, 12.25s/it]


In [7]:
print("Hi!!!")

Hi!!!
