In [1]:
import pandas as pd
from torch.utils.data import Dataset
import os
from PIL import Image
from sklearn.preprocessing import LabelEncoder
import numpy as np
from torch.utils.data import DataLoader, Subset


# Création du Dataset de train
path_global_5s = "../../Dataset acoustique insectes/CSVs morceaux audio 5s/Audible/train_audible_recording_chunks.csv"
df = pd.read_csv(path_global_5s)
label_encoder = LabelEncoder()
label_encoder.fit(df['label'])
nb_classes = len(label_encoder.classes_)

class CustomImageDataset(Dataset):
    def __init__(self, directory, pd_directory, transform=None):
        self.directory = directory
        self.transform = transform
        self.images = os.listdir(directory)
        df = pd.read_csv(pd_directory)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.directory, self.images[idx])
        img_name = self.images[idx]

        image = Image.open(img_path).convert('RGB')
        label = np.zeros(nb_classes)

        a = img_name.split("_")
        chunk_initial_time = a[-2]
        code_unique = "_".join(a[:-3])
        
        labels = df[(df["code_unique"] == code_unique) & (df["chunk_initial_time"] == int(chunk_initial_time))]["label"]
        label[label_encoder.transform(labels)] = 1

        if self.transform:
            image = self.transform(image)

        return image, label



In [2]:
from torchvision import transforms

# Transformation pour normaliser les données et potentiellement redimensionner les images
transform = transforms.Compose([
    transforms.Resize((775, 308)),  # Si vos images ne sont pas de la taille requise
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [3]:
train_dataset_path = "../../Dataset acoustique insectes/Sélection morceaux audio 5s/Audible/train_spectro"
train_pd_directory = "../../Dataset acoustique insectes/CSVs morceaux audio 5s/Audible/train_audible_recording_chunks.csv"
train_dataset = CustomImageDataset(train_dataset_path, train_pd_directory, transform=transform)
train_dataset = Subset(train_dataset, range(10))
train_data_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)

test_dataset_path = "../../Dataset acoustique insectes/Sélection morceaux audio 5s/Audible/train_spectro"
test_pd_directory = "../../Dataset acoustique insectes/CSVs morceaux audio 5s/Audible/train_audible_recording_chunks.csv"
test_dataset = CustomImageDataset(test_dataset_path, test_pd_directory, transform=transform)
test_dataset = Subset(test_dataset, range(10))
test_data_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=True)

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, datasets
from tqdm import tqdm

# Vérifiez si un GPU est disponible et définissez le device en conséquence
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class CustomCNN(nn.Module):
    def __init__(self, num_classes=70):
        super(CustomCNN, self).__init__()
        # Convolutional layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        
        # Adaptive pooling permet d'avoir une taille fixe de sortie pour le FC layer, indépendamment de la taille d'entrée
        self.adaptive_pool = nn.AdaptiveAvgPool2d((7, 7))

        # Fully connected layers
        self.fc1 = nn.Linear(128 * 7 * 7, 1024)
        self.fc2 = nn.Linear(1024, num_classes)
        
        # Dropout pour réduire l'overfitting
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.adaptive_pool(x)
        x = x.view(-1, 128 * 7 * 7)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return x

# Initialisation du modèle, perte et optimiseur
model = CustomCNN(num_classes=70).to(device)  # Déplacez le modèle sur le GPU si disponible
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10  # Définissez le nombre d'époques souhaité

for epoch in tqdm(range(num_epochs)):
    model.train()  # Mode entraînement
    train_loss = 0
    for images, labels in tqdm(train_data_loader):
        images, labels = images.to(device), labels.to(device)  # Déplacez les données et les cibles sur le GPU si disponible
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
    
    train_loss /= len(train_data_loader)
    
    model.eval()  # Mode évaluation
    test_loss = 0
    with torch.no_grad():
        for images, labels in test_data_loader:
            images, labels = images.to(device), labels.to(device)  # Déplacez les données et les cibles sur le GPU si disponible
            outputs = model(images)
            loss = criterion(outputs, labels)
            test_loss += loss.item()
    
    test_loss /= len(test_data_loader)
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')


100%|██████████| 1/1 [00:04<00:00,  4.65s/it]
 10%|█         | 1/10 [00:05<00:53,  5.96s/it]

Epoch [1/10], Train Loss: 8.4987, Test Loss: 4.5111


100%|██████████| 1/1 [00:02<00:00,  2.96s/it]
 20%|██        | 2/10 [00:10<00:40,  5.10s/it]

Epoch [2/10], Train Loss: 4.5558, Test Loss: 1.4317


  0%|          | 0/1 [00:01<?, ?it/s]
 20%|██        | 2/10 [00:11<00:46,  5.77s/it]


KeyboardInterrupt: 