<a href="https://colab.research.google.com/github/MuhammedShiway/AICrowd-Mosquito/blob/main/%5B06%5D_Classification_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.0.145-py3-none-any.whl (605 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m605.6/605.6 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: ultralytics
Successfully installed ultralytics-8.0.145




---

# <center> PATHS </center>


---



In [3]:
# IMAGE PATHS
train_path = '/content/gdrive/MyDrive/Colab Notebooks/AI CROWD Mosquito/Dataset/Original /train_images'
test_path = '/content/gdrive/MyDrive/Colab Notebooks/AI CROWD Mosquito/Dataset/Original /test_images_phase1'

# CSV PATHS
train_csv_path = '/content/gdrive/MyDrive/Colab Notebooks/AI CROWD Mosquito/Dataset/Original /train.csv'
test_csv_path = '/content/gdrive/MyDrive/Colab Notebooks/AI CROWD Mosquito/Dataset/Original /test_phase1_v2.csv'

# YOLO PATHS
yolo_train = '/content/gdrive/MyDrive/Colab Notebooks/AI CROWD Mosquito/Dataset/YOLO/Train'
yolo_test = '/content/gdrive/MyDrive/Colab Notebooks/AI CROWD Mosquito/Dataset/YOLO/Test'

# MODEL PATHS
model_path = '/content/gdrive/MyDrive/Colab Notebooks/AI CROWD Mosquito/Dataset/YOLO/Detection Models/RUN 7.pt'

# YOLO CSV PATH
yolo_csv_path = '/content/gdrive/MyDrive/Colab Notebooks/AI CROWD Mosquito/Models/yolo_csv.csv'

# CROPPED PATHS
train_csv_crop = '/content/gdrive/MyDrive/Colab Notebooks/AI CROWD Mosquito/Dataset/Cropped/Train_CSV'
train_yolo_crop = '/content/gdrive/MyDrive/Colab Notebooks/AI CROWD Mosquito/Dataset/Cropped/Train_YOLO'
test_crop = '/content/gdrive/MyDrive/Colab Notebooks/AI CROWD Mosquito/Dataset/Cropped/Test'


csv_file = train_csv_path



---

# <center> IMPORT MODULES AND LIBRARIES </center>


---



In [18]:
import os
import random
import shutil
import csv
import torch
import pandas as pd
from tqdm import tqdm
from PIL import Image
%matplotlib inline
from pprint import pprint
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import torchvision.models as models
from sklearn.metrics import f1_score
from torchvision import transforms, datasets
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, random_split




---

# <center> FOLDER CONTENT DELETER </center>


---



In [None]:
def delete_folder_contents(folder_path):
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            print('Failed to delete %s. Reason: %s' % (file_path, e))






---

# <center> CSV FILE </center>


---





In [5]:
# Load the CSV data into a pandas DataFrame
df = pd.read_csv(csv_file)

df.head()

Unnamed: 0,img_fName,img_w,img_h,bbx_xtl,bbx_ytl,bbx_xbr,bbx_ybr,class_label
0,92715872-3287-4bff-aa61-7047973e5c02.jpeg,2448,3264,1301,1546,1641,2096,albopictus
1,b0f7cc74-2272-4756-a387-38bcaf6965c8.jpeg,3024,4032,900,1897,1950,2990,albopictus
2,82df4b68-0f45-4afe-9215-48488bf3720e.jpeg,768,1024,220,58,659,808,albopictus
3,331ad30a-7564-4478-b863-7bc760adf5a1.jpeg,3456,4608,1169,2364,1586,2826,albopictus
4,1a46dbfb-104e-466b-88d7-98958d7b1fe9.jpeg,1024,1365,129,231,697,1007,culex


In [6]:
df['class_label'].unique()

array(['albopictus', 'culex', 'anopheles', 'culiseta',
       'japonicus/koreicus', 'aegypti'], dtype=object)

In [7]:
df['class_label'].value_counts()

albopictus            3567
culex                 3544
culiseta               492
japonicus/koreicus     321
anopheles               63
aegypti                 38
Name: class_label, dtype: int64



---

# <center> TRANSFORMS </center>


---



In [8]:
img_names = df['img_fName'].tolist()
general_labels = df['class_label'].tolist()

In [43]:
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2,
                           contrast=0.2,
                           saturation=0.2,
                           hue=0.1),
    transforms.RandomAffine(degrees=0,
                            translate=(0.1, 0.1),
                            scale=(0.8, 1.2),
                            shear=10),
    transforms.Resize((384,384)),
    transforms.RandomCrop((384,384), padding=4),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5],
                         std=[0.5, 0.5, 0.5])
])

In [44]:
all_labels = ['albopictus', 'culex', 'anopheles', 'culiseta', 'japonicus/koreicus', 'aegypti']
label_to_int = {label: idx for idx, label in enumerate(all_labels)}

In [45]:
label_to_int

{'albopictus': 0,
 'culex': 1,
 'anopheles': 2,
 'culiseta': 3,
 'japonicus/koreicus': 4,
 'aegypti': 5}



---

# <center> CUSTOM DATALOADER </center>


---



In [46]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, img_names, labels, root_dir, transform=None):
        self.img_names = img_names
        self.labels = labels
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.img_names)

    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, self.img_names[idx])
        image = Image.open(img_path)
        label_str = self.labels[idx]
        label = label_to_int[label_str]
        # this label returns the integer (0 to 5) corresponding to the species

        if self.transform:
            image = self.transform(image)

        return image, label


In [47]:
dataset = CustomDataset(img_names, general_labels, root_dir=train_csv_crop, transform=transform)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])


In [48]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)



---

# <center> DEFINING / SELECTING MODEL </center>


---



In [49]:
model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, len(set(general_labels)))

In [50]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
pprint(device)

device(type='cuda', index=0)


In [51]:
import torch.optim as optim

criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [30]:
for inputs, labels in train_loader:
    print(type(inputs), type(labels))
    print(inputs.shape, labels.shape)
    break

<class 'torch.Tensor'> <class 'torch.Tensor'>
torch.Size([32, 3, 384, 384]) torch.Size([32])




---

# <center> MODEL TRAINING </center>


---



In [52]:
def train_model(model, criterion, optimizer, num_epochs=25):
    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")
        print("-" * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
                dataloader = train_loader
            else:
                model.eval()   # Set model to evaluate mode
                dataloader = val_loader

            running_loss = 0.0
            correct_preds = 0

            pbar = tqdm(dataloader, desc=f"{phase} progress", unit="batch")

            for inputs, labels in pbar:
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                correct_preds += torch.sum(preds == labels.data)

                pbar.set_postfix(loss=loss.item())

            epoch_loss = running_loss / len(dataloader.dataset)
            epoch_acc = correct_preds.double() / len(dataloader.dataset)

            print(f"{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")
        print()

    return model


In [31]:
trained_model = train_model(model, criterion, optimizer, num_epochs=25)

Epoch 1/25
----------


train progress:   0%|          | 1/201 [00:31<1:45:57, 31.79s/batch, loss=1.11]


KeyboardInterrupt: ignored

In [53]:
def train_model(model, criterion, optimizer, num_epochs=25):

    history = {
        'train_loss': [],
        'train_accuracy': [],
        'train_macro_f1': [],
        'val_loss': [],
        'val_accuracy': [],
        'val_macro_f1': []
    }

    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")
        print("-" * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
                dataloader = train_loader
            else:
                model.eval()   # Set model to evaluate mode
                dataloader = val_loader

            running_loss = 0.0
            correct_preds = 0
            all_labels = []
            all_preds = []

            pbar = tqdm(dataloader, desc=f"{phase} progress", unit="batch")

            for inputs, labels in pbar:
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                correct_preds += torch.sum(preds == labels.data)
                all_labels.extend(labels.cpu().numpy())
                all_preds.extend(preds.cpu().numpy())

                pbar.set_postfix(loss=loss.item())

            epoch_loss = running_loss / len(dataloader.dataset)
            epoch_acc = correct_preds.double() / len(dataloader.dataset)
            epoch_macro_f1 = f1_score(all_labels, all_preds, average='macro')

            print(f"{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f} Macro F1: {epoch_macro_f1:.4f}")

            history[f"{phase}_loss"].append(epoch_loss)
            history[f"{phase}_accuracy"].append(epoch_acc.item())
            history[f"{phase}_macro_f1"].append(epoch_macro_f1)

        print()

    return model, history



In [None]:
trained_model, train_history = train_model(model, criterion, optimizer, num_epochs=25)

Epoch 1/25
----------


train progress: 100%|██████████| 201/201 [1:29:11<00:00, 26.62s/batch, loss=0.298]


train Loss: 0.6776 Acc: 0.7849 Macro F1: 0.2987


val progress: 100%|██████████| 51/51 [22:22<00:00, 26.32s/batch, loss=0.415]


val Loss: 0.5271 Acc: 0.8405 Macro F1: 0.3670

Epoch 2/25
----------


train progress: 100%|██████████| 201/201 [11:30<00:00,  3.44s/batch, loss=0.125]


train Loss: 0.4410 Acc: 0.8643 Macro F1: 0.3820


val progress: 100%|██████████| 51/51 [02:45<00:00,  3.24s/batch, loss=0.35]


val Loss: 0.4294 Acc: 0.8611 Macro F1: 0.4058

Epoch 3/25
----------


train progress: 100%|██████████| 201/201 [11:44<00:00,  3.50s/batch, loss=0.622]


train Loss: 0.3838 Acc: 0.8799 Macro F1: 0.4205


val progress: 100%|██████████| 51/51 [02:54<00:00,  3.41s/batch, loss=0.209]


val Loss: 0.4147 Acc: 0.8617 Macro F1: 0.4284

Epoch 4/25
----------


train progress: 100%|██████████| 201/201 [11:46<00:00,  3.51s/batch, loss=0.286]


train Loss: 0.3433 Acc: 0.8900 Macro F1: 0.4498


val progress: 100%|██████████| 51/51 [02:48<00:00,  3.30s/batch, loss=0.0831]


val Loss: 0.3923 Acc: 0.8735 Macro F1: 0.4628

Epoch 5/25
----------


train progress: 100%|██████████| 201/201 [11:40<00:00,  3.49s/batch, loss=0.271]


train Loss: 0.3045 Acc: 0.9017 Macro F1: 0.4755


val progress: 100%|██████████| 51/51 [02:47<00:00,  3.28s/batch, loss=0.352]


val Loss: 0.3594 Acc: 0.8822 Macro F1: 0.4687

Epoch 6/25
----------


train progress: 100%|██████████| 201/201 [11:39<00:00,  3.48s/batch, loss=0.406]


train Loss: 0.2911 Acc: 0.9092 Macro F1: 0.4965


val progress: 100%|██████████| 51/51 [02:55<00:00,  3.45s/batch, loss=0.032]


val Loss: 0.3563 Acc: 0.8885 Macro F1: 0.4939

Epoch 7/25
----------


train progress: 100%|██████████| 201/201 [11:48<00:00,  3.53s/batch, loss=0.336]


train Loss: 0.2608 Acc: 0.9159 Macro F1: 0.5085


val progress:  78%|███████▊  | 40/51 [02:14<00:35,  3.24s/batch, loss=0.337]



---

# <center> RENAME MODEL AND SAVE </center>


---



In [None]:
torch.save(trained_model.state_dict(), '/content/gdrive/MyDrive/Colab Notebooks/AI CROWD Mosquito/Models/model_001.pth')



---

# <center> MODEL INFERENCE / TESTING </center>


---



In [None]:
yolo_csv = pd.read_csv(yolo_csv_path)

yolo_csv.head()

Unnamed: 0,img_fName,img_w,img_h
0,ce8f0ede-4366-45b9-bdd4-959e17c7618c.jpeg,1599,965
1,f207780c-0d0a-4d41-bd95-92ca53ff81a3.jpeg,822,861
2,99c6b7f9-dbea-4635-a18d-cde16b125515.jpeg,720,1280
3,662b4405-3600-41b9-9812-451b6d9df322.jpeg,3024,4032
4,1de14b6b-6781-4a24-ae5e-f760e865c758.jpeg,1242,2208


In [None]:
transform = transforms.Compose([
    transforms.Resize((384,384)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])


for index, row in yolo_csv.iterrows():
    img_path = os.path.join(test_crop, row['img_fName'])
    image = Image.open(img_path)
    image_tensor = transform(image).unsqueeze(0).to(device)

    # Predict
    with torch.no_grad():
        outputs = trained_model(image_tensor)
        _, predicted = torch.max(outputs, 1)
        class_label = all_labels[predicted.item()]

    # Save to CSV
    yolo_csv.at[index, 'class_label'] = class_label


In [None]:
yolo_csv

Unnamed: 0,img_fName,img_w,img_h,class_label
0,ce8f0ede-4366-45b9-bdd4-959e17c7618c.jpeg,1599,965,culex
1,f207780c-0d0a-4d41-bd95-92ca53ff81a3.jpeg,822,861,albopictus
2,99c6b7f9-dbea-4635-a18d-cde16b125515.jpeg,720,1280,albopictus
3,662b4405-3600-41b9-9812-451b6d9df322.jpeg,3024,4032,culex
4,1de14b6b-6781-4a24-ae5e-f760e865c758.jpeg,1242,2208,albopictus
...,...,...,...,...
2671,5071a8e8-b210-4929-bf48-7bafb4b8ba06.jpeg,3840,5120,culex
2672,fd610106-3aff-4172-9e12-5037d107af30.jpeg,4000,3000,albopictus
2673,c0677977-ef6f-4397-b809-a7f7c29e54f6.jpeg,4032,2268,culex
2674,df645ac5-dc5c-4549-95f2-7fbe1c4c4c90.jpeg,1078,1280,culex




---

# <center> RENAME SUBMISSION CSV AND SAVE </center>


---



In [None]:
# Save the updated TEST CSV
yolo_csv.to_csv('/content/gdrive/MyDrive/Colab Notebooks/AI CROWD Mosquito/Models/test_csv_001.csv', index=False)