In [1]:
import torch

from Modules import LoadingModule
from Modules import Features_encoder
from Modules import quantizationModule
from Modules import wav2vec_transformer
from Modules import ContrastiveLoss

from Modules import TempLibriSpeech

In [2]:
"""

#data loader module init
StandardScalerTransform = LoadingModule.StandardScalerTransform
LargeDataModule = LoadingModule.LargeDataModule("./data/Librispeech", batch_size=16, num_workers=1, transform=StandardScalerTransform)
"""

'\n\n#data loader module init\nStandardScalerTransform = LoadingModule.StandardScalerTransform\nLargeDataModule = LoadingModule.LargeDataModule("./data/Librispeech", batch_size=16, num_workers=1, transform=StandardScalerTransform)\n'

In [9]:
#Temp import dataloader ### rendre compatible PLightning quand on aura le GPU
# en attendant import manuel
from torch.utils.data import DataLoader



dataset = TempLibriSpeech.LibriSpeech(split="train-clean-100", target_length=48000, device='cuda')
data_loader = torch.utils.data.DataLoader(dataset, batch_size=8, shuffle=True)

In [10]:
for i, (audio, text) in enumerate(data_loader):
    print(f"Exemple {i+1}")
    print(f"Audio shape: {audio.shape}")
    print(f"Texte: {text}")
    print("-" * 50)
    if i == 1: 
        break

Exemple 1
Audio shape: torch.Size([8, 48000])
Texte: ('ALL THIS CONSIDERED THERE IS A JUDGMENT OF HEAVEN UPON THIS NATION IF THESE DISTRACTIONS CONTINUE GOD SO DEAL WITH ME AND MINE AS ALL MY THOUGHTS AND INTENTIONS ARE UPRIGHT FOR THE MAINTENANCE OF THE TRUE PROTESTANT PROFESSION', 'AS ONE JAPANESE HAS WRITTEN OUR REFLECTION BROUGHT INTO PROMINENCE NOT SO MUCH THE MORAL AS THE NATIONAL CONSCIOUSNESS OF THE INDIVIDUAL TO US THE COUNTRY IS MORE THAN LAND AND SOIL FROM WHICH TO MINE GOLD OR REAP GRAIN', 'THEY HAD SUNSHINE RAIN HAIL SNOW AND A TORNADO AND THEN RAIN AGAIN AND MORE SUNSHINE SUNNY ITALY SEEMED A MISNOMER THAT DAY AS INDEED IT DOES MANY DAYS IN WINTER AND SPRING', 'BUT NO ONE WAS MORE DELIGHTED TO MEET ELSIE THAN HERBERT AND SHE WAS VERY GLAD TO LEARN THAT HIS HEALTH WAS GRADUALLY IMPROVING HE WAS NOT HOWEVER AT ALL STRONG EVEN YET AND HIS MOTHER THOUGHT IT BEST FOR HIM TO LIE DOWN AND REST A LITTLE AFTER HIS RIDE', 'EVEN SHOULD I BREAK ONE OF THEM WITH MY FIRST BLOW FOR I FI

In [5]:
### Model dev ###

In [6]:
import torch
import torch.nn as nn

class Model_W2V(nn.Module):
    def __init__(self, embed_size, num_heads, dropout, forward_expansion, kernel_size, groups, d_model, num_layers, max_relative_position):

        #EAB
        self.batch_size = batch_size
        #self.seq_length = seq_length
        self.embed_size = embed_size
        self.mask_prob = 0.00
        self.mask_length = 1
        self.num_heads = num_heads
        self.dropout = dropout
        self.forward_expansion = forward_expansion
        self.kernel_size = kernel_size
        self.groups = groups
        self.d_model = d_model
        self.num_layers = num_layers

        self.num_codebooks = 2
        self.num_codes = 320
        
        self.code_dim = 256
        self.output_dim = 512
        self.temperature= 0.07

        self.max_relative_position = max_relative_position

        super(Model_W2V, self).__init__()

        

        self.FeaturesEncoder = Features_encoder.FeatureEncoder(input_channels=1, feature_dim=512) #1501 ?
        self.masking = wav2vec_transformer.MaskingWithLearnableEmbedding()
        # d_model, num_heads, dropout, forward_expansion):
        self.TranformerBlock = wav2vec_transformer.TransformerBlockW(self.d_model, self.num_heads, self.dropout, self.forward_expansion)   #(self.embed_size, self.num_heads, self.dropout, self.forward_expansion, self.kernel_size, self.groups, self.d_model, self.max_relative_position)
        self.quantization = quantizationModule.QuantizationModule(self.num_codebooks, self.num_codes, self.code_dim, self.output_dim, self.temperature)
        self.LossItem = ContrastiveLoss.LossW2V(20)
#embed_size, num_heads, dropout, forward_expansion, kernel_size, groups, d_model):
    def forward(self, x):


       # print("ORIGINAL , ", x.shape)
        x = x.to(next(self.parameters()).device)
        x = x.unsqueeze(1)

        x = self.FeaturesEncoder(x)
        
       #
        
        
      #  print("q",x.shape)
        
        quantized_repr = self.quantization(x)

        masked_reps, mask = self.masking(x, self.mask_prob, self.mask_length) #(self, x, mask_prob, mask_length)

        contextualized_reps = self.TranformerBlock(masked_reps, masked_reps, masked_reps, mask)
                                                # value, key, query, mask=None
    

        #print("Debug", contextualized_reps.shape, quantized_repr.shape, mask.shape)
        loss = self.LossItem.compute_loss(contextualized_reps, quantized_repr, mask, self.batch_size)
        
   # embed_size, num_heads, dropout, forward_expansion, kernel_size, groups,d_model
        
        return x, contextualized_reps, loss
    

In [7]:
import torch
from torch.utils.data import DataLoader
import torch.optim as optim
from tqdm import tqdm

def train_model(model, dataset, epochs, learning_rate, device):

    dataloader = DataLoader(dataset, batch_size=model.batch_size, shuffle=True)
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    model.train()
    
    for epoch in range(epochs):
        
        epoch_loss = 0
        total_loss = 0.0

        num_batches = len(dataloader) - 1

        for batch_idx, (inputs, _) in enumerate(tqdm( data_loader, desc=f"Epoch {epoch+1}/{epochs}")):
            if batch_idx >= num_batches:
                break  # S'arrêter avant la dernière itération
                
            optimizer.zero_grad()
            
            _,_, loss = model(inputs)
            
            loss.backward()
            optimizer.step()
            
            epoch_loss += loss.item()
            total_loss += loss.item()

            if batch_idx % 10 == 0:
                print(f"Epoch [{epoch+1}/{epochs}], Step [{batch_idx+1}/{num_batches}], Loss: {loss.item():.4f}")

        avg_loss = total_loss / num_batches
        print(f"Epoch [{epoch+1}/{epochs}] Average Loss: {avg_loss:.4f}")


In [8]:
batch_size = 8
seq_length = 151
embed_size = 512
num_heads = 8
dropout = 0.1
forward_expansion = 4
kernel_size = 7
groups = 2
d_model = 512
num_layers = 12

max_relative_position=128
torch.autograd.set_detect_anomaly(True)

device = 'cuda'#torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = Model_W2V(embed_size, num_heads, dropout, forward_expansion, kernel_size, groups, d_model, num_layers, max_relative_position).to(device)


train_model(model, dataset, epochs=250, learning_rate=1e-5, device=device)


  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 739, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 205, in start
    self.asyncio_loop.run_forever()
  File "/usr/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
    self._run_once()
  File "/usr/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
    handle._run()
  File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run
    self._context.run(self._call

OutOfMemoryError: CUDA out of memory. Tried to allocate 11.13 GiB. GPU 0 has a total capacity of 47.43 GiB of which 324.31 MiB is free. Including non-PyTorch memory, this process has 0 bytes memory in use. Of the allocated memory 35.60 GiB is allocated by PyTorch, and 11.20 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
from sklearn.metrics import accuracy_score, f1_score

In [None]:
model.eval()

# Créez un DataLoader pour le jeu de données de test
dataset = TempLibriSpeech.LibriSpeech(split="test-clean", target_length=480000, device='cuda')
data_loader = DataLoader(dataset, batch_size=16, shuffle=False)

all_quantized_reps = []
all_contextualized_reps = []

# Calcul de la perte de reconstruction ou de quantification
total_loss = 0
with torch.no_grad():
    for batch_idx, batch in enumerate(data_loader):
        inputs, _ = batch  # Ici, _ signifie qu'il n'y a pas de labels
        
        inputs = inputs.to('cuda')  # Assurez-vous que les inputs sont sur le bon device

        # Passe avant
        quantized_repr, contextualized_reps, loss = model(inputs)

        # Ajout des représentations à la liste
        all_quantized_reps.append(quantized_repr.cpu().numpy())
        all_contextualized_reps.append(contextualized_reps.cpu().numpy())

        # Accumuler la perte
        total_loss += loss.item()

# Calcul de la moyenne de la perte sur l'ensemble du dataset
average_loss = total_loss / len(data_loader)

# Affichage des résultats
print(f"Average Loss: {average_loss:.4f}")


In [None]:
average_loss = total_loss / len(data_loader)

# Affichage des résultats
print(f"Average Loss: {average_loss:.4f}")

In [None]:
inputs