In [1]:
import numpy as np
import pandas as pd
import os
from PIL import Image
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from torchvision import models
from torchvision.models import MobileNet_V2_Weights

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [3]:
train_df = pd.read_csv('train.csv')
train_ids = train_df['id'].tolist()
train_labels = train_df['species']

train_labels.head()

0              Acer_Opalus
1    Pterocarya_Stenoptera
2     Quercus_Hartwissiana
3          Tilia_Tomentosa
4       Quercus_Variabilis
Name: species, dtype: object

In [4]:
image_directory = 'images'
image_list = []
image_test_list = []

for filename in os.listdir(image_directory):
    if filename.endswith(('.jpg', '.jpeg', '.png')):
        label = int(os.path.splitext(filename)[0])  
        if label in train_ids:
            image_list.append(f"images/{label}.jpg") 
        else:
            image_test_list.append(f"images/{label}.jpg")



In [5]:
# Define transforms for data augmentation and preprocessing
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # Convert to grayscale
    transforms.Resize((64, 64)),  # Resize to 64x64 during transformation
    transforms.RandomRotation(10),  # Random rotations
    transforms.RandomHorizontalFlip(),  # Horizontal flipping for augmentation
    transforms.ToTensor(),
])

In [6]:
def add_gaussian_noise(image, noise_factor=0.05):
    """Adds Gaussian noise to the image."""
    noise = torch.randn(image.size()) * noise_factor
    noisy_image = image + noise
    return torch.clamp(noisy_image, 0., 1.)  # Clamping to keep pixel values in valid range

In [7]:
class CustomDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        label = self.labels[idx]

        with Image.open(image_path) as img:
            if self.transform:
                img = self.transform(img)

        img = add_gaussian_noise(img)  # Add Gaussian noise to the image
        label = torch.tensor(label).long()

        return img, label

In [8]:
# Create dataset and dataloader
label_encoder = LabelEncoder()
train_labels = label_encoder.fit_transform(train_labels)
dataset = CustomDataset(image_list, train_labels, transform=transform)
dataloader = DataLoader(dataset, batch_size=64)

In [9]:
# Load pre-trained MobileNet model
model = models.mobilenet_v2(weights=MobileNet_V2_Weights.IMAGENET1K_V1)
num_classes = 99  # Change this to your number of classes
# Modify the first convolutional layer to accept 1-channel input instead of 3
model.features[0][0] = nn.Conv2d(1, 32, kernel_size=3, stride=2, padding=1, bias=False)

# Replace the classifier head
model.classifier[1] = nn.Linear(model.last_channel, num_classes)

# Move the entire model to the device after modifications
model = model.to(device)

# Set the model to training mode
model.train()

MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [10]:
# Define criterion and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0015)

In [11]:
num_epochs = 65

for epoch in range(num_epochs):
    model.train()
    total_correct = 0
    total_samples = 0
    total_loss = 0
    
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        _, predicted = torch.max(outputs, 1)
        total_samples += labels.size(0)
        total_correct += (predicted == labels).sum().item()
        total_loss += loss.item()
        accuracy = total_correct / total_samples * 100

    avg_loss = total_loss / len(dataloader)

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%')

# Save the model after training completes
torch.save(model.state_dict(), 'model_final.pth')
print("Model saved successfully!")


Epoch [1/65], Loss: 5.0300, Accuracy: 0.61%
Epoch [2/65], Loss: 4.7397, Accuracy: 1.01%
Epoch [3/65], Loss: 4.6986, Accuracy: 2.73%
Epoch [4/65], Loss: 4.5851, Accuracy: 1.72%
Epoch [5/65], Loss: 4.6032, Accuracy: 1.92%
Epoch [6/65], Loss: 4.5789, Accuracy: 3.03%
Epoch [7/65], Loss: 4.4296, Accuracy: 3.23%
Epoch [8/65], Loss: 4.4339, Accuracy: 3.03%
Epoch [9/65], Loss: 4.2976, Accuracy: 3.64%
Epoch [10/65], Loss: 4.2815, Accuracy: 3.74%
Epoch [11/65], Loss: 4.1727, Accuracy: 4.55%
Epoch [12/65], Loss: 4.0621, Accuracy: 5.35%
Epoch [13/65], Loss: 4.0785, Accuracy: 4.75%
Epoch [14/65], Loss: 4.1147, Accuracy: 4.85%
Epoch [15/65], Loss: 4.0963, Accuracy: 4.75%
Epoch [16/65], Loss: 3.9601, Accuracy: 5.56%
Epoch [17/65], Loss: 3.9530, Accuracy: 6.46%
Epoch [18/65], Loss: 3.8546, Accuracy: 5.25%
Epoch [19/65], Loss: 3.6947, Accuracy: 8.38%
Epoch [20/65], Loss: 3.6627, Accuracy: 6.57%
Epoch [21/65], Loss: 3.5091, Accuracy: 8.18%
Epoch [22/65], Loss: 3.3088, Accuracy: 12.53%
Epoch [23/65], Los

In [12]:
class TestDataset(Dataset):
    def __init__(self, image_paths, transform=None):
        self.image_paths = image_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]

        with Image.open(image_path) as img:
            if self.transform:
                img = self.transform(img)

        return img


In [13]:
test_transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # Convert to grayscale
    transforms.Resize((64, 64)),  # Resize to 64x64 during transformation
    transforms.ToTensor(),
])

In [14]:
test_dataset = TestDataset(image_test_list, transform=test_transform)
test_dataloader = DataLoader(test_dataset, batch_size=64)

In [15]:
model.eval()
predictions = []
with torch.no_grad():
    for images in test_dataloader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())

In [16]:
predicted_labels = label_encoder.inverse_transform(predictions)
predicted_labels[:10]

array(['Liquidambar_Styraciflua', 'Quercus_Greggii',
       'Quercus_Infectoria_sub', 'Zelkova_Serrata', 'Populus_Nigra',
       'Eucalyptus_Urnigera', 'Alnus_Sieboldiana', 'Quercus_Cerris',
       'Quercus_Alnifolia', 'Quercus_Semecarpifolia'], dtype=object)