<a href="https://colab.research.google.com/github/MuhammedShiway/AICrowd-Mosquito/blob/main/%5B05%5D_Classification_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive




---

# <center> PATHS </center>


---



In [2]:
# IMAGE PATHS
train_path = '/content/gdrive/MyDrive/Colab Notebooks/AI CROWD Mosquito/Dataset/Original /train_images'
test_path = '/content/gdrive/MyDrive/Colab Notebooks/AI CROWD Mosquito/Dataset/Original /test_images_phase1'

# CSV PATHS
train_csv_path = '/content/gdrive/MyDrive/Colab Notebooks/AI CROWD Mosquito/Dataset/Original /train.csv'
test_csv_path = '/content/gdrive/MyDrive/Colab Notebooks/AI CROWD Mosquito/Dataset/Original /test_phase1_v2.csv'

# YOLO PATHS
yolo_train = '/content/gdrive/MyDrive/Colab Notebooks/AI CROWD Mosquito/Dataset/YOLO/Train'
yolo_test = '/content/gdrive/MyDrive/Colab Notebooks/AI CROWD Mosquito/Dataset/YOLO/Test'

# MODEL PATHS
model_path = '/content/gdrive/MyDrive/Colab Notebooks/AI CROWD Mosquito/Dataset/YOLO/Detection Models/RUN 7.pt'

# CROPPED PATHS
train_csv_crop = '/content/gdrive/MyDrive/Colab Notebooks/AI CROWD Mosquito/Dataset/Cropped/Train_CSV'
train_yolo_crop = '/content/gdrive/MyDrive/Colab Notebooks/AI CROWD Mosquito/Dataset/Cropped/Train_YOLO'
test_crop = '/content/gdrive/MyDrive/Colab Notebooks/AI CROWD Mosquito/Dataset/Cropped/Test'


csv_file = train_csv_path



---

# <center> IMPORT MODULES AND LIBRARIES </center>


---



In [3]:
import os
import random
import shutil
import csv
import pandas as pd
import torch
from PIL import Image
%matplotlib inline
from pprint import pprint
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from torchvision import transforms, datasets
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, random_split




---

# <center> FOLDER CONTENT DELETER </center>


---



In [None]:
def delete_folder_contents(folder_path):
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            print('Failed to delete %s. Reason: %s' % (file_path, e))

# Usage
# delete_folder_contents('/path/to/your/folder')




c



In [4]:
# Load the CSV data into a pandas DataFrame
df = pd.read_csv(csv_file)

df.head()

Unnamed: 0,img_fName,img_w,img_h,bbx_xtl,bbx_ytl,bbx_xbr,bbx_ybr,class_label
0,92715872-3287-4bff-aa61-7047973e5c02.jpeg,2448,3264,1301,1546,1641,2096,albopictus
1,b0f7cc74-2272-4756-a387-38bcaf6965c8.jpeg,3024,4032,900,1897,1950,2990,albopictus
2,82df4b68-0f45-4afe-9215-48488bf3720e.jpeg,768,1024,220,58,659,808,albopictus
3,331ad30a-7564-4478-b863-7bc760adf5a1.jpeg,3456,4608,1169,2364,1586,2826,albopictus
4,1a46dbfb-104e-466b-88d7-98958d7b1fe9.jpeg,1024,1365,129,231,697,1007,culex


In [50]:
df['class_label'].unique()

array(['albopictus', 'culex', 'anopheles', 'culiseta',
       'japonicus/koreicus', 'aegypti'], dtype=object)

In [67]:
df['class_label'].value_counts()

albopictus            3567
culex                 3544
culiseta               492
japonicus/koreicus     321
anopheles               63
aegypti                 38
Name: class_label, dtype: int64



---

# <center> TRANSFORMS </center>


---



In [75]:
img_names = df['img_fName'].tolist()
general_labels = df['class_label'].tolist()

In [None]:
general_labels

In [38]:
transform = transforms.Compose([
    transforms.Resize((384,384)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

In [71]:
all_labels = ['albopictus', 'culex', 'anopheles', 'culiseta', 'japonicus/koreicus', 'aegypti']
label_to_int = {label: idx for idx, label in enumerate(all_labels)}

In [72]:
label_to_int

{'albopictus': 0,
 'culex': 1,
 'anopheles': 2,
 'culiseta': 3,
 'japonicus/koreicus': 4,
 'aegypti': 5}

In [53]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, img_names, labels, root_dir, transform=None):
        self.img_names = img_names
        self.labels = labels
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.img_names)

    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, self.img_names[idx])
        image = Image.open(img_path)
        label_str = self.labels[idx]
        label = label_to_int[label_str]
        # this label returns the integer (0 to 5) corresponding to the species

        if self.transform:
            image = self.transform(image)

        return image, label


In [77]:
dataset = CustomDataset(img_names, general_labels, root_dir=train_csv_crop, transform=transform)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])


In [78]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [80]:
import torchvision.models as models

model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, len(set(general_labels)))

In [81]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
pprint(model)
pprint(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [82]:
import torch.optim as optim

criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [84]:
for inputs, labels in train_loader:
    print(type(inputs), type(labels))
    print(inputs.shape, labels.shape)
    break

<class 'torch.Tensor'> <class 'torch.Tensor'>
torch.Size([32, 3, 384, 384]) torch.Size([32])


In [85]:
def train_model(model, criterion, optimizer, num_epochs=25):
    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")
        print("-" * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
                dataloader = train_loader
            else:
                model.eval()   # Set model to evaluate mode
                dataloader = val_loader

            running_loss = 0.0
            correct_preds = 0

            for inputs, labels in dataloader:
                inputs, labels = inputs.to(device), labels.to(device)

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # Backward and optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # Statistics
                running_loss += loss.item() * inputs.size(0)
                correct_preds += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloader.dataset)
            epoch_acc = correct_preds.double() / len(dataloader.dataset)

            print(f"{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")
        print()

    return model


In [None]:
trained_model = train_model(model, criterion, optimizer, num_epochs=25)

Epoch 1/25
----------
