In [None]:
import torch
import torch.nn as nn
import torchaudio
import torchaudio.transforms as T
from scipy.fftpack import dct 
import librosa
import numpy as np
import pandas as pd
import os
import soundfile as sf
import matplotlib.pyplot as plt


device = "cuda" if torch.cuda.is_available() else "cpu"
print(f'Using device: {device}')

Using device: cuda


### GENERAZIONE DEI DISARTRICI 

In [None]:
#struttra del generatore
class ResidualSumBlock(nn.Module):
    """
    Residual block che somma i flussi
    """
    def __init__(self, in_channels):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, in_channels, 3, 1, 1)
        self.bn1 = nn.BatchNorm2d(in_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(in_channels, in_channels, 3, 1, 1)
        self.bn2 = nn.BatchNorm2d(in_channels)

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += residual
        return self.relu(out)

class Generator(nn.Module):
    def __init__(self, in_channels=1, base_channels=8, num_res_blocks=3, residual_mode='sum'): # residual_mode = 'sum' or 'concat'
        super().__init__()

        self.input_conv = nn.Sequential(
            nn.Conv2d(in_channels, base_channels, 3, 1, 1),
            nn.ReLU(inplace=True)
        )
    
        self.res_blocks = nn.Sequential(*[ResidualSumBlock(base_channels) for _ in range(num_res_blocks)])
        self.output_conv = nn.Conv2d(base_channels, in_channels, 3, 1, 1)

    def forward(self, x):
        out = self.input_conv(x)
        out = self.res_blocks(out)
        out = self.output_conv(out)
        return out

In [None]:
input_path = "/home/deepfake/DysarthricGAN/M04/M04_MEL_SPEC/"
generator_path = "/home/deepfake/DysarthricGAN/M04/results_audio/004/generated_tensors/generated_melspec_495/generator_495.pth" # path del miglior generatore

In [4]:
#carico i pesi del generatore
generator = Generator(in_channels=1, residual_mode='sum').to(device) 
state_dict = torch.load(generator_path, map_location=device)
generator.load_state_dict(state_dict)
generator.eval()

  state_dict = torch.load(generator_path, map_location=device)


Generator(
  (input_conv): Sequential(
    (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
  )
  (res_blocks): Sequential(
    (0): ResidualSumBlock(
      (conv1): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): ResidualSumBlock(
      (conv1): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (2): Res

In [None]:

for folder_name in os.listdir(input_path):
    folder_path = os.path.join(input_path, folder_name)
    out_folder = os.path.join(input_path, folder_name)

    print(f"\nElaborazione codice: {folder_name}")

    if not os.path.isdir(folder_path):
        print(f"Cartella non trovata: {folder_path}. Salto.")
        continue

    # Gestione di sano.pth ---
    sano_file = os.path.join(folder_path, "sano.pth")
    if os.path.exists(sano_file):
        try:
            print(f"  -> Caricamento: {sano_file}")
            # Carica il tensore e lo sposta sul device specificato
            sano_tensor = torch.load(sano_file, map_location=device)
            
            # Assicura che il tensore abbia almeno 4 dimensioni (Batch, Canali, Altezza, Larghezza)
            if sano_tensor.dim() == 3:
                sano_tensor = sano_tensor.unsqueeze(0) # Aggiunge dimensione batch
            
            # Esecuzione del generatore in modalità senza calcolo dei gradienti
            with torch.no_grad():
                gen_tensor = generator(sano_tensor)
            
            # Crea cartella di output se non esiste
            os.makedirs(out_folder, exist_ok=True)
            
            # Salva il tensore generato (riporta su CPU prima di salvare)
            out_file = os.path.join(out_folder, "dis_gen.pth")
            torch.save(gen_tensor.cpu(), out_file)
            print(f"Generato e salvato: {out_file}")
            
        except Exception as e:
            print(f"Errore elaborazione sano.pth in {folder_name}: {e}")
    else:
        print(f"File sano.pth non trovato in {folder_name}. Salto.")

print("\n Elaborazione completata per tutti i codici selezionati.")


⚙️ Elaborazione codice: B2_UW42
  -> Caricamento: /home/deepfake/DysarthricGAN/M04/M04_MEL_SPEC/B2_UW42/sano.pth
✅ Generato e salvato: /home/deepfake/DysarthricGAN/M04/M04_MEL_SPEC/B2_UW42/dis_gen.pth

⚙️ Elaborazione codice: B3_C12
  -> Caricamento: /home/deepfake/DysarthricGAN/M04/M04_MEL_SPEC/B3_C12/sano.pth
✅ Generato e salvato: /home/deepfake/DysarthricGAN/M04/M04_MEL_SPEC/B3_C12/dis_gen.pth

⚙️ Elaborazione codice: B1_UW51
  -> Caricamento: /home/deepfake/DysarthricGAN/M04/M04_MEL_SPEC/B1_UW51/sano.pth
✅ Generato e salvato: /home/deepfake/DysarthricGAN/M04/M04_MEL_SPEC/B1_UW51/dis_gen.pth

⚙️ Elaborazione codice: B1_UW63
  -> Caricamento: /home/deepfake/DysarthricGAN/M04/M04_MEL_SPEC/B1_UW63/sano.pth
✅ Generato e salvato: /home/deepfake/DysarthricGAN/M04/M04_MEL_SPEC/B1_UW63/dis_gen.pth

⚙️ Elaborazione codice: B3_UW61
  -> Caricamento: /home/deepfake/DysarthricGAN/M04/M04_MEL_SPEC/B3_UW61/sano.pth
✅ Generato e salvato: /home/deepfake/DysarthricGAN/M04/M04_MEL_SPEC/B3_UW61/dis_g

  sano_tensor = torch.load(sano_file, map_location=device)


✅ Generato e salvato: /home/deepfake/DysarthricGAN/M04/M04_MEL_SPEC/B2_C13/dis_gen.pth

⚙️ Elaborazione codice: B3_CW96
  -> Caricamento: /home/deepfake/DysarthricGAN/M04/M04_MEL_SPEC/B3_CW96/sano.pth
✅ Generato e salvato: /home/deepfake/DysarthricGAN/M04/M04_MEL_SPEC/B3_CW96/dis_gen.pth

⚙️ Elaborazione codice: B3_LV
  -> Caricamento: /home/deepfake/DysarthricGAN/M04/M04_MEL_SPEC/B3_LV/sano.pth
✅ Generato e salvato: /home/deepfake/DysarthricGAN/M04/M04_MEL_SPEC/B3_LV/dis_gen.pth

⚙️ Elaborazione codice: B3_UW13
  -> Caricamento: /home/deepfake/DysarthricGAN/M04/M04_MEL_SPEC/B3_UW13/sano.pth
✅ Generato e salvato: /home/deepfake/DysarthricGAN/M04/M04_MEL_SPEC/B3_UW13/dis_gen.pth

⚙️ Elaborazione codice: B2_CW10
  -> Caricamento: /home/deepfake/DysarthricGAN/M04/M04_MEL_SPEC/B2_CW10/sano.pth
✅ Generato e salvato: /home/deepfake/DysarthricGAN/M04/M04_MEL_SPEC/B2_CW10/dis_gen.pth

⚙️ Elaborazione codice: B1_CW65
  -> Caricamento: /home/deepfake/DysarthricGAN/M04/M04_MEL_SPEC/B1_CW65/sano.p

###  VOCODING (WAVEGLOW)

In [None]:
import torch
import torch.nn as nn
import numpy as np
import torchaudio
import torchaudio.transforms as T
import librosa
import numpy as np
import os
import soundfile as sf
import matplotlib.pyplot as plt
import sys
import glob
sys.path.append('/home/deepfake/DysarthricGAN')
from denoiser import Denoiser



device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
# Sampling rate del WAV originale
sr_orig = 22050

# Parametri mel-spectrogram generator
n_fft = 1024
hop_length = 256
win_length = 1024
n_mels = 80

# Parametri normalizzazione generator
mu_spk = -8.4277
sigma_spk = 3.399



In [None]:
input_path = "/home/deepfake/DysarthricGAN/M04/M04_MEL_SPEC" 
output_dir = "/home/deepfake/DysarthricGAN/patients_gen/M04_new_2" # La cartella dove salvare l'output WAV
os.makedirs(output_dir, exist_ok=True)

In [None]:
print("Caricamento WaveGlow...")
waveglow = torch.hub.load(
    'NVIDIA/DeepLearningExamples:torchhub',
    'nvidia_waveglow',
    model_math='fp32'
).to(device).eval()
waveglow = waveglow.remove_weightnorm(waveglow)
denoiser = Denoiser(waveglow).to(device)
print("WaveGlow caricato e pronto.")

Caricamento WaveGlow...


Using cache found in /home/deepfake/.cache/torch/hub/NVIDIA_DeepLearningExamples_torchhub
  WeightNorm.apply(module, name, dim)


✅ WaveGlow caricato e pronto.


In [None]:
for folder_name in os.listdir(input_path):
    folder_path = os.path.join(input_path, folder_name)

    if os.path.isdir(folder_path):
        
        # Definisci i percorsi I/O specifici
        mel_input_file = "dis_gen.pth"
        mel_input_path = os.path.join(folder_path, mel_input_file)
         
        
        output_wav_name = f"{folder_name}.wav" 
        output_wav_path = os.path.join(output_dir, output_wav_name)
        
    

        if not os.path.exists(mel_input_path):
            print(f"File {mel_input_file} non trovato in {folder_name}. Salto.")
            continue
        
        print(f"Elaborazione {folder_name} -> {output_wav_name} ---")

        # Carica il mel-spectrogram
        try:
            mel_generated = torch.load(mel_input_path, map_location=device)
        except Exception as e:
            print(f"Errore nel caricamento di {mel_input_path}: {e}")
            continue
        
        # Denormalizzazione 
        mel_generated = (mel_generated * sigma_spk) + mu_spk 
        
        # Adatta il tensor alla forma richiesta da WaveGlow
        mel_generated = mel_generated.squeeze(0).to(device)
        
        # Sintesi audio con WaveGlow
        with torch.no_grad():
            y_gen = waveglow.infer(mel_generated)
            # Applicazione Denoiser
            y_gen = denoiser(y_gen, strength=0.05)

        
        # Estrae il canale [0, 0, :] e converte in NumPy float32 1D
        y_gen = y_gen[0, 0, :].cpu().numpy().astype(np.float32)

        # Normalizzazione (clipping a 1.0)
        max_val = np.max(np.abs(y_gen))
        if max_val > 1.0:
            y_gen = y_gen / max_val
            
        # Salvataggio WAV
        sf.write(output_wav_path, y_gen, sr_orig)
        print(f" File WAV generato e salvato in: {output_wav_path}")

print("\nElaborazione batch completata")

--- Elaborazione B2_UW42 -> B2_UW42.wav ---
   ✅ File WAV generato e salvato in: /home/deepfake/DysarthricGAN/patients_gen/M04_new_2/B2_UW42.wav
--- Elaborazione B3_C12 -> B3_C12.wav ---
   ✅ File WAV generato e salvato in: /home/deepfake/DysarthricGAN/patients_gen/M04_new_2/B3_C12.wav
--- Elaborazione B1_UW51 -> B1_UW51.wav ---
   ✅ File WAV generato e salvato in: /home/deepfake/DysarthricGAN/patients_gen/M04_new_2/B1_UW51.wav
--- Elaborazione B1_UW63 -> B1_UW63.wav ---
   ✅ File WAV generato e salvato in: /home/deepfake/DysarthricGAN/patients_gen/M04_new_2/B1_UW63.wav
--- Elaborazione B3_UW61 -> B3_UW61.wav ---
   ✅ File WAV generato e salvato in: /home/deepfake/DysarthricGAN/patients_gen/M04_new_2/B3_UW61.wav
--- Elaborazione B2_UW23 -> B2_UW23.wav ---
   ✅ File WAV generato e salvato in: /home/deepfake/DysarthricGAN/patients_gen/M04_new_2/B2_UW23.wav
--- Elaborazione B3_UW67 -> B3_UW67.wav ---
   ✅ File WAV generato e salvato in: /home/deepfake/DysarthricGAN/patients_gen/M04_new_2/