In [6]:
path = "/kaggle/input/insectes-acoustique/"

In [7]:
import pandas as pd
from torch.utils.data import Dataset
import os
from PIL import Image
from sklearn.preprocessing import LabelEncoder
import numpy as np
from torch.utils.data import DataLoader, Subset
from tqdm import tqdm


# Création du Dataset de train
path_global_5s = path + "Dataset acoustique insectes/CSVs morceaux audio 5s/Audible/train_audible_recording_chunks.csv"
df = pd.read_csv(path_global_5s)
label_encoder = LabelEncoder()
label_encoder.fit(df['label'])
nb_classes = len(label_encoder.classes_)

class CustomImageDataset(Dataset):
    def __init__(self, directory, pd_directory, transform=None):
        self.directory = directory
        self.transform = transform
        self.images_name = os.listdir(self.directory)
        
        self.images = []
        for image_path in tqdm(os.listdir(self.directory)):
            img_path = os.path.join(self.directory, image_path)
            self.images.append(Image.open(img_path).convert('RGB'))
        
        df = pd.read_csv(pd_directory)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images_name[idx]
        image = self.images[idx]
        label = np.zeros(nb_classes)

        a = img_name.split("_")
        chunk_initial_time = a[-2]
        code_unique = "_".join(a[:-3])
        
        labels = df[(df["code_unique"] == code_unique) & (df["chunk_initial_time"] == int(chunk_initial_time))]["label"]
        label[label_encoder.transform(labels)] = 1

        if self.transform:
            image = self.transform(image)

        return image, label



In [8]:
from torchvision import transforms

# Transformation pour normaliser les données et potentiellement redimensionner les images
transform = transforms.Compose([
    transforms.Resize((775, 308)),  # Si vos images ne sont pas de la taille requise
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [9]:
train_dataset_path = path + "Dataset acoustique insectes/S�lection morceaux audio 5s/Audible/train_spectro"
train_pd_directory = path + "Dataset acoustique insectes/CSVs morceaux audio 5s/Audible/train_audible_recording_chunks.csv"
train_dataset = CustomImageDataset(train_dataset_path, train_pd_directory, transform=transform)
train_data_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)

test_dataset_path = path + "Dataset acoustique insectes/S�lection morceaux audio 5s/Audible/train_spectro"
test_pd_directory = path + "Dataset acoustique insectes/CSVs morceaux audio 5s/Audible/train_audible_recording_chunks.csv"
test_dataset = CustomImageDataset(test_dataset_path, test_pd_directory, transform=transform)
test_data_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=True)

100%|██████████| 5003/5003 [00:39<00:00, 125.68it/s]
100%|██████████| 5003/5003 [00:37<00:00, 131.83it/s]


In [42]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, datasets
from tqdm import tqdm

# Vérifiez si un GPU est disponible et définissez le device en conséquence
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class CustomCNN(nn.Module):
    def __init__(self, num_classes=70):
        super(CustomCNN, self).__init__()
        # Convolutional layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        
        # Adaptive pooling permet d'avoir une taille fixe de sortie pour le FC layer, indépendamment de la taille d'entrée
        self.adaptive_pool = nn.AdaptiveAvgPool2d((7, 7))

        # Fully connected layers
        self.fc1 = nn.Linear(128 * 7 * 7, 1024)
        self.fc2 = nn.Linear(1024, num_classes)
        
        # Dropout pour réduire l'overfitting
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.adaptive_pool(x)
        x = x.view(-1, 128 * 7 * 7)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        #x = F.softmax(x, dim=1)
        return x

num_classes = 70
weight = np.zeros(num_classes)

for _, labels in tqdm(train_data_loader, position=0):
    weight += labels.numpy().sum(axis=0)

# Initialisation du modèle, perte et optimiseur
model = CustomCNN(num_classes=num_classes).to(device)  # Déplacez le modèle sur le GPU si disponible
criterion = nn.BCEWithLogitsLoss(pos_weight=torch.from_numpy(weight).to(device))
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10  # Définissez le nombre d'époques souhaité

for epoch in tqdm(range(num_epochs)):
    model.train()  # Mode entraînement
    train_loss = 0
    print(f'{epoch}, training :')
    for images, labels in tqdm(train_data_loader, position=0):
        images, labels = images.to(device), labels.to(device)  # Déplacez les données et les cibles sur le GPU si disponible
        
        outputs = model(images)
        loss = criterion(outputs.float(), labels.float())
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
    
    train_loss /= len(train_data_loader)
    
    """model.eval()  # Mode évaluation
    test_loss = 0
    with torch.no_grad():
        for images, labels in tqdm(test_data_loader, position=0):
            images, labels = images.to(device), labels.to(device)  # Déplacez les données et les cibles sur le GPU si disponible
            outputs = model(images)
            loss = criterion(outputs.float(), labels.float())
            test_loss += loss.item()
    
    test_loss /= len(test_data_loader)
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')"""
    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}')


100%|██████████| 157/157 [00:42<00:00,  3.68it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

0, training :


100%|██████████| 157/157 [01:21<00:00,  1.92it/s]
 10%|█         | 1/10 [01:21<12:14, 81.62s/it]

Epoch [1/10], Train Loss: 1.9190
1, training :


100%|██████████| 157/157 [01:21<00:00,  1.94it/s]
 20%|██        | 2/10 [02:42<10:50, 81.30s/it]

Epoch [2/10], Train Loss: 1.5378
2, training :


100%|██████████| 157/157 [01:20<00:00,  1.94it/s]
 30%|███       | 3/10 [04:03<09:27, 81.10s/it]

Epoch [3/10], Train Loss: 1.3932
3, training :


100%|██████████| 157/157 [01:20<00:00,  1.94it/s]
 40%|████      | 4/10 [05:24<08:05, 81.00s/it]

Epoch [4/10], Train Loss: 1.2756
4, training :


100%|██████████| 157/157 [01:20<00:00,  1.95it/s]
 50%|█████     | 5/10 [06:45<06:44, 80.87s/it]

Epoch [5/10], Train Loss: 1.1791
5, training :


100%|██████████| 157/157 [01:20<00:00,  1.95it/s]
 60%|██████    | 6/10 [08:05<05:23, 80.79s/it]

Epoch [6/10], Train Loss: 1.0894
6, training :


100%|██████████| 157/157 [01:20<00:00,  1.95it/s]
 70%|███████   | 7/10 [09:26<04:02, 80.72s/it]

Epoch [7/10], Train Loss: 1.0273
7, training :


100%|██████████| 157/157 [01:20<00:00,  1.94it/s]
 80%|████████  | 8/10 [10:47<02:41, 80.75s/it]

Epoch [8/10], Train Loss: 0.9620
8, training :


100%|██████████| 157/157 [01:20<00:00,  1.94it/s]
 90%|█████████ | 9/10 [12:07<01:20, 80.76s/it]

Epoch [9/10], Train Loss: 0.9067
9, training :


100%|██████████| 157/157 [01:20<00:00,  1.95it/s]
100%|██████████| 10/10 [13:28<00:00, 80.85s/it]

Epoch [10/10], Train Loss: 0.8593





In [107]:
# Initialiser pour calculer l'accuracy
correct_predictions = 0
incorrect_predictions = 0
total_predictions = 0
total_predictions_made = 0

nb_good_prediction = np.zeros(num_classes) #Nombre de fois où il devait être predit et où il a été prédit
nb_bad_prediction = np.zeros(num_classes) #Nombre de fois où il devait pas être predit et où il a été prédit
nb_no_prediction = np.zeros(num_classes) #Nombre de fois où il devait être predit et où il a pas été prédit
nb_must_be_predited = np.zeros(num_classes) # Nombre de fois où il devait être predit
nb_must_not_be_predited = np.zeros(num_classes) # Nombre de fois où il devait pas être predit
nb_is_predicted = np.zeros(num_classes)
nb_prediction = 0

with torch.no_grad():  # Désactive le calcul du gradient pour économiser de la mémoire et accélérer
    for images, labels in tqdm(test_data_loader, position=0):
        images, labels = images.to(device), labels.to(device)
        
        # Obtenez les prédictions du modèle
        outputs = model(images)
        
        # Appliquez un seuil pour convertir les probabilités en prédictions binaires
        # Ici, on utilise 0.5 comme seuil, mais cela pourrait être ajusté en fonction de votre cas d'utilisation
        predicted = (outputs > 0.99).float()
        
        nb_must_be_predited += labels.cpu().numpy().sum(axis=0)
        nb_must_not_be_predited += (-labels + 1).cpu().numpy().sum(axis=0)
        nb_good_prediction += ((predicted == 1) & (labels == 1)).cpu().numpy().sum(axis=0)
        nb_bad_prediction += ((predicted == 1) & (labels == 0)).cpu().numpy().sum(axis=0)
        nb_no_prediction += ((predicted == 0) & (labels == 1)).cpu().numpy().sum(axis=0)
        nb_is_predicted += predicted.cpu().numpy().sum(axis=0)
        nb_prediction += predicted.shape[0]
        
"""        # Calculer le nombre de prédictions correctes
        # Note: Les opérations sont effectuées en booléen, puis converties en float pour le calcul de la moyenne
        correct_predictions += (predicted == labels).float().sum()
        incorrect_predictions += (predicted != labels).float().sum()
        total_predictions += torch.numel(labels)
        total_predictions_made += torch.numel(predicted)

# Calcul de l'accuracy moyenne
accuracy = correct_predictions / total_predictions
error_rate  = incorrect_predictions / total_predictions
print(f'Accuracy: {accuracy.item()}, Error_rate : {error_rate.item()}')"""

100%|██████████| 157/157 [00:58<00:00,  2.67it/s]


"        # Calculer le nombre de prédictions correctes\n        # Note: Les opérations sont effectuées en booléen, puis converties en float pour le calcul de la moyenne\n        correct_predictions += (predicted == labels).float().sum()\n        incorrect_predictions += (predicted != labels).float().sum()\n        total_predictions += torch.numel(labels)\n        total_predictions_made += torch.numel(predicted)\n\n# Calcul de l'accuracy moyenne\naccuracy = correct_predictions / total_predictions\nerror_rate  = incorrect_predictions / total_predictions\nprint(f'Accuracy: {accuracy.item()}, Error_rate : {error_rate.item()}')"

In [109]:
a = nb_good_prediction / nb_must_be_predited
b = nb_bad_prediction / nb_must_not_be_predited
c = nb_no_prediction / nb_must_be_predited
d = nb_is_predicted / nb_must_be_predited

print("predit-label")
print("classe \t 1-1 \t 1-0 \t 0-1 \t nb de fois predite / nb fois où il doit l'être")
for i in range(len(nb_must_be_predited)):
    print(f'{i} \t {a[i]:.2f} \t {b[i]:.2f} \t {c[i]:.2f} \t {d[i]:.2f}')

predit-label
classe 	 1-1 	 1-0 	 0-1 	 nb de fois predite / nb fois où il doit l'être
0 	 0.88 	 0.02 	 0.12 	 2.86
1 	 0.89 	 0.23 	 0.11 	 9.60
2 	 0.14 	 0.00 	 0.86 	 0.29
3 	 1.00 	 0.03 	 0.00 	 2.01
4 	 0.97 	 0.59 	 0.03 	 10.88
5 	 0.67 	 0.06 	 0.33 	 6.63
6 	 0.89 	 0.13 	 0.11 	 8.57
7 	 0.43 	 0.03 	 0.57 	 3.00
8 	 0.46 	 0.05 	 0.54 	 4.85
9 	 0.94 	 0.00 	 0.06 	 1.17
10 	 1.00 	 0.38 	 0.00 	 10.29
11 	 0.79 	 0.04 	 0.21 	 4.35
12 	 0.83 	 0.29 	 0.17 	 12.09
13 	 0.37 	 0.02 	 0.63 	 2.35
14 	 0.80 	 0.07 	 0.20 	 4.96
15 	 0.96 	 0.11 	 0.04 	 4.23
16 	 0.89 	 0.11 	 0.11 	 7.27
17 	 0.98 	 0.47 	 0.02 	 9.91
18 	 0.57 	 0.03 	 0.43 	 4.40
19 	 0.91 	 0.15 	 0.09 	 8.37
20 	 0.59 	 0.02 	 0.41 	 3.59
21 	 0.61 	 0.03 	 0.39 	 3.10
22 	 0.99 	 0.15 	 0.01 	 4.24
23 	 0.53 	 0.04 	 0.47 	 3.93
24 	 0.82 	 0.05 	 0.18 	 4.72
25 	 0.99 	 0.02 	 0.01 	 1.83
26 	 0.95 	 0.03 	 0.05 	 2.18
27 	 0.99 	 0.17 	 0.01 	 3.16
28 	 0.99 	 0.08 	 0.01 	 4.88
29 	 0.76 	 0.02 	 0.