In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [31]:
import os
import torch
from torchvision import transforms
from transformers import ViTFeatureExtractor, ViTForImageClassification
from PIL import Image
from torch.utils.data import DataLoader, Dataset

# Define the directory paths
train_path = "/kaggle/input/the-wildfire-dataset/the_wildfire_dataset/the_wildfire_dataset/train"
# val_path = "/kaggle/input/the-wildfire-dataset/the_wildfire_dataset/the_wildfire_dataset/val"
test_path = "/kaggle/input/the-wildfire-dataset/the_wildfire_dataset/the_wildfire_dataset/test"

# ViT Feature Extractor
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')

# Define a custom dataset class
class WildfireDataset(Dataset):
    def __init__(self, folder_path, feature_extractor):
        self.folder_path = folder_path
        self.image_files = []  # List to store image file paths
        self.labels = []  # List to store corresponding labels
        label_mapping = {
#             "Smoke_confounding_elements": 0,
#             "Forested_areas_without_confounding_elements": 1,
#             "Fire_confounding_elements": 2,
            "Smoke_from_fires": 0,
            "Both_smoke_and_fire": 1,
        }


        # Populate image_files and labels based on the clarified folder structure
        for class_label in ["nofire", "fire"]:
            current_path = f"{folder_path}/{class_label}"
            if class_label == "nofire":
                continue
            else:
                for subclass_label in ["Smoke_from_fires", "Both_smoke_and_fire"]:
                    current_subclass_path = f"{current_path}/{subclass_label}"
                    image_files = os.listdir(current_subclass_path)
                    self.image_files.extend([f"{current_subclass_path}/{img}" for img in image_files])
                    self.labels.extend([label_mapping[subclass_label]] * len(image_files))

        self.feature_extractor = feature_extractor

    def __getitem__(self, idx):
        img_path = self.image_files[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert("RGB")
        
        # Ensure that the image has the correct shape (num_channels, height, width)
        image = self.feature_extractor(images=image, return_tensors="pt")["pixel_values"].squeeze(0)

        return {
            "pixel_values": image,
            "labels": torch.tensor(label),
        }

    
    def __len__(self):
        return len(self.image_files)

# Create dataset instances
train_dataset = WildfireDataset(train_path, feature_extractor)
# val_dataset = WildfireDataset(val_path, feature_extractor)
test_dataset = WildfireDataset(test_path, feature_extractor)

# Create DataLoader instances
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)



In [3]:
# Check the length of the subset dataset
print("Number of samples in the subset dataset:", len(train_dataset))

Number of samples in the subset dataset: 730


In [5]:
import torch
from transformers import ViTForImageClassification
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score
import torch.nn as nn

# Define the loss function (criterion)
criterion = nn.CrossEntropyLoss()

# Load the fine-tuned model
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k',num_labels=2)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    model = torch.nn.DataParallel(model)
model.to(device)

# Define optimizer and loss function
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)

# Define training parameters
num_epochs = 5

# for param in model.base_model.parameters():
#     param.requires_grad = False  # Freeze initial layers


# Store losses and accuracies
train_losses = []
accuracies = []

# Training loop
for epoch in range(num_epochs):
    model.train()
    epoch_labels = []
    epoch_predictions = []

    for batch in train_dataloader:
        inputs = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)

        optimizer.zero_grad()
        outputs = model(inputs, labels=labels)
        logits = outputs.logits
        loss = criterion(logits, labels)  # Compute the appropriate loss function here
        loss.backward()
        optimizer.step()

        train_losses.append(loss.item())


        # Store predictions and labels for accuracy calculation
        epoch_labels.extend(labels.cpu().numpy())
        epoch_predictions.extend(torch.argmax(outputs.logits, axis=1).cpu().numpy())

    # Calculate accuracy
    accuracy = accuracy_score(epoch_labels, epoch_predictions)
    accuracies.append(accuracy)

    # Print average training loss and accuracy for the epoch
    avg_train_loss = sum(train_losses[-len(train_dataloader):]) / len(train_dataloader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Avg Train Loss: {avg_train_loss:.4f}, Accuracy: {accuracy:.4f}")

# Store accuracies for further analysis
print("Accuracies:", accuracies)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Let's use 2 GPUs!
Epoch 1/5, Avg Train Loss: 0.6433, Accuracy: 0.6575




Epoch 2/5, Avg Train Loss: 0.5159, Accuracy: 0.8192




Epoch 3/5, Avg Train Loss: 0.4054, Accuracy: 0.8795




Epoch 4/5, Avg Train Loss: 0.3105, Accuracy: 0.9247




Epoch 5/5, Avg Train Loss: 0.2257, Accuracy: 0.9534
Accuracies: [0.6575342465753424, 0.8191780821917808, 0.8794520547945206, 0.9246575342465754, 0.9534246575342465]


In [8]:
import torch
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score
import numpy as np

# Initialize val_accuracies list
val_accuracies = []

# Validation loop
model.eval()
val_losses = []
val_preds = []
val_labels = []

with torch.no_grad():
    for batch in test_dataloader:
        inputs = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)

        # Check for non-empty batches
        if inputs.size(0) == 0:
            continue

        outputs = model(inputs, labels=labels)
        
        # Check for NaN values in the loss
        if np.any(np.isnan(outputs.loss.cpu().numpy())):
            print("NaN loss encountered. Skipping batch.")
            continue

        # Compute the loss using the appropriate criterion
        loss = criterion(outputs.logits, labels)
        val_losses.append(loss.item())

        predictions = torch.argmax(outputs.logits, dim=1)
        val_preds.extend(predictions.cpu().numpy())
        val_labels.extend(labels.cpu().numpy())

# Calculate validation accuracy
val_accuracy = accuracy_score(val_labels, val_preds)
val_loss = sum(val_losses) / len(val_losses)

print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")
print(f"Validation Loss: {val_loss:.4f}")

# Store validation accuracy for further analysis
val_accuracies.append(val_accuracy)

Validation Accuracy: 87.42%
Validation Loss: 0.3312


In [10]:
# Save the fine-tuned model
if isinstance(model, torch.nn.DataParallel):
    model.module.save_pretrained("fire_subclass_model_VIT")
else:
    model.save_pretrained("fire_subclass_model_VIT")

In [11]:
# Create a zip archive of a folder
!zip -r fire_subclass_model_VIT.zip fire_subclass_model_VIT

  adding: fire_subclass_model_VIT/ (stored 0%)
  adding: fire_subclass_model_VIT/model.safetensors (deflated 7%)
  adding: fire_subclass_model_VIT/config.json (deflated 46%)


**Training for 5 more epochs**

In [4]:
import torch
from transformers import ViTForImageClassification
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score
import torch.nn as nn

# Define the loss function (criterion)
criterion = nn.CrossEntropyLoss()

# Load the fine-tuned model
model = ViTForImageClassification.from_pretrained('/kaggle/input/capstone-models/fire_subclass_model_VIT/fire_subclass_model_VIT',num_labels=2)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    model = torch.nn.DataParallel(model)
model.to(device)

# Define optimizer and loss function
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)

# Define training parameters
num_epochs = 5

# for param in model.base_model.parameters():
#     param.requires_grad = False  # Freeze initial layers
start_epoch = 6

# Store losses and accuracies
train_losses = []
accuracies = []

# Training loop
for epoch in range(start_epoch, start_epoch + num_epochs):
    model.train()
    epoch_labels = []
    epoch_predictions = []

    for batch in train_dataloader:
        inputs = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)

        optimizer.zero_grad()
        outputs = model(inputs, labels=labels)
        logits = outputs.logits
        loss = criterion(logits, labels)  # Compute the appropriate loss function here
        loss.backward()
        optimizer.step()

        train_losses.append(loss.item())


        # Store predictions and labels for accuracy calculation
        epoch_labels.extend(labels.cpu().numpy())
        epoch_predictions.extend(torch.argmax(outputs.logits, axis=1).cpu().numpy())

    # Calculate accuracy
    accuracy = accuracy_score(epoch_labels, epoch_predictions)
    accuracies.append(accuracy)

    # Print average training loss and accuracy for the epoch
    avg_train_loss = sum(train_losses[-len(train_dataloader):]) / len(train_dataloader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Avg Train Loss: {avg_train_loss:.4f}, Accuracy: {accuracy:.4f}")

# Store accuracies for further analysis
print("Accuracies:", accuracies)

Let's use 2 GPUs!




Epoch 7/5, Avg Train Loss: 0.1633, Accuracy: 0.9712




Epoch 8/5, Avg Train Loss: 0.1111, Accuracy: 0.9863




Epoch 9/5, Avg Train Loss: 0.0801, Accuracy: 0.9890




Epoch 10/5, Avg Train Loss: 0.0602, Accuracy: 0.9932




Epoch 11/5, Avg Train Loss: 0.0474, Accuracy: 0.9959
Accuracies: [0.9712328767123287, 0.9863013698630136, 0.989041095890411, 0.9931506849315068, 0.9958904109589041]


In [5]:
import torch
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score
import numpy as np

# Validation loop
model.eval()
val_losses = []
val_preds = []
val_labels = []

with torch.no_grad():
    for batch in test_dataloader:
        inputs = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)

        # Check for non-empty batches
        if inputs.size(0) == 0:
            continue

        outputs = model(inputs, labels=labels)
        
        # Check for NaN values in the loss
        if np.any(np.isnan(outputs.loss.cpu().numpy())):
            print("NaN loss encountered. Skipping batch.")
            continue

        # Compute the loss using the appropriate criterion
        loss = criterion(outputs.logits, labels)
        val_losses.append(loss.item())

        predictions = torch.argmax(outputs.logits, dim=1)
        val_preds.extend(predictions.cpu().numpy())
        val_labels.extend(labels.cpu().numpy())

# Calculate validation accuracy
val_accuracy = accuracy_score(val_labels, val_preds)
val_loss = sum(val_losses) / len(val_losses)

print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")
print(f"Validation Loss: {val_loss:.4f}")



Validation Accuracy: 86.79%
Validation Loss: 0.3500


In [6]:
# Save the fine-tuned model
if isinstance(model, torch.nn.DataParallel):
    model.module.save_pretrained("fire_subclass_model_VIT_10epochs")
else:
    model.save_pretrained("fire_subclass_model_VIT_10epochs")

In [7]:
# Create a zip archive of a folder
!zip -r fire_subclass_model_VIT_10epochs.zip fire_subclass_model_VIT_10epochs

  adding: fire_subclass_model_VIT_10epochs/ (stored 0%)
  adding: fire_subclass_model_VIT_10epochs/config.json (deflated 47%)
  adding: fire_subclass_model_VIT_10epochs/model.safetensors (deflated 7%)


**Running for more epochs , even 1 resulst in accuracy going down**

In [None]:
import torch
from transformers import ViTForImageClassification
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score
import torch.nn as nn

# Define the loss function (criterion)
criterion = nn.CrossEntropyLoss()

# Load the fine-tuned model
model = ViTForImageClassification.from_pretrained('/kaggle/input/capstone-models/fire_subclass_model_VIT/fire_subclass_model_VIT',num_labels=2)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    model = torch.nn.DataParallel(model)
model.to(device)

# Define optimizer and loss function
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)

# Define training parameters
num_epochs = 3

# for param in model.base_model.parameters():
#     param.requires_grad = False  # Freeze initial layers
start_epoch = 6

# Store losses and accuracies
train_losses = []
accuracies = []

# Training loop
for epoch in range(start_epoch, start_epoch + num_epochs):
    model.train()
    epoch_labels = []
    epoch_predictions = []

    for batch in train_dataloader:
        inputs = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)

        optimizer.zero_grad()
        outputs = model(inputs, labels=labels)
        logits = outputs.logits
        loss = criterion(logits, labels)  # Compute the appropriate loss function here
        loss.backward()
        optimizer.step()

        train_losses.append(loss.item())


        # Store predictions and labels for accuracy calculation
        epoch_labels.extend(labels.cpu().numpy())
        epoch_predictions.extend(torch.argmax(outputs.logits, axis=1).cpu().numpy())

    # Calculate accuracy
    accuracy = accuracy_score(epoch_labels, epoch_predictions)
    accuracies.append(accuracy)

    # Print average training loss and accuracy for the epoch
    avg_train_loss = sum(train_losses[-len(train_dataloader):]) / len(train_dataloader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Avg Train Loss: {avg_train_loss:.4f}, Accuracy: {accuracy:.4f}")

# Store accuracies for further analysis
print("Accuracies:", accuracies)

Let's use 2 GPUs!




Epoch 7/3, Avg Train Loss: 0.1713, Accuracy: 0.9671




In [None]:
import torch
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score
import numpy as np

# Validation loop
model.eval()
val_losses = []
val_preds = []
val_labels = []

with torch.no_grad():
    for batch in test_dataloader:
        inputs = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)

        # Check for non-empty batches
        if inputs.size(0) == 0:
            continue

        outputs = model(inputs, labels=labels)
        
        # Check for NaN values in the loss
        if np.any(np.isnan(outputs.loss.cpu().numpy())):
            print("NaN loss encountered. Skipping batch.")
            continue

        # Compute the loss using the appropriate criterion
        loss = criterion(outputs.logits, labels)
        val_losses.append(loss.item())

        predictions = torch.argmax(outputs.logits, dim=1)
        val_preds.extend(predictions.cpu().numpy())
        val_labels.extend(labels.cpu().numpy())

# Calculate validation accuracy
val_accuracy = accuracy_score(val_labels, val_preds)
val_loss = sum(val_losses) / len(val_losses)

print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")
print(f"Validation Loss: {val_loss:.4f}")

**Oversampling**

In [3]:
import os
import torch
from torchvision import transforms
from transformers import ViTFeatureExtractor, ViTForImageClassification
from PIL import Image
from torch.utils.data import DataLoader, Dataset
from collections import defaultdict

# Define the directory paths
train_path = "/kaggle/input/the-wildfire-dataset/the_wildfire_dataset/the_wildfire_dataset/train"
# val_path = "/kaggle/input/validation-edited/val"
test_path = "/kaggle/input/the-wildfire-dataset/the_wildfire_dataset/the_wildfire_dataset/test"

# ViT Feature Extractor
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')

# Define a custom dataset class
class WildfireDataset(Dataset):
    def __init__(self, folder_path, feature_extractor):
        self.image_files = []  # List to store image file paths
        self.labels = []  # List to store corresponding labels
        label_mapping = {
            "Smoke_from_fires": 0,
            "Both_smoke_and_fire": 1,
        }

        # Count the number of samples for each label
        label_counts = defaultdict(int)
        
        # Populate image_files and labels based on the clarified folder structure
        for class_label in ["nofire", "fire"]:
                current_path = f"{folder_path}/{class_label}"
                if class_label == "nofire":
                    continue
                else:
                    for subclass_label in ["Smoke_from_fires", "Both_smoke_and_fire"]:
                        current_subclass_path = f"{current_path}/{subclass_label}"
                        image_files = os.listdir(current_subclass_path)
                        self.image_files.extend([f"{current_subclass_path}/{img}" for img in image_files])
                        label = label_mapping[subclass_label]
                        self.labels.extend([label] * len(image_files))
                        label_counts[label] += len(image_files)
            

        # Calculate the maximum count of samples among all labels
        max_count = max(label_counts.values())

        # Oversample label 1 to match the count of label 0
        for label, count in label_counts.items():
            if label == 1:
#                 oversample_factor = max_count // count
                oversample_factor = 1
                self.image_files.extend([img_path for img_path, lbl in zip(self.image_files, self.labels) if lbl == label] * oversample_factor)
                self.labels.extend([label] * (count * oversample_factor))

        self.feature_extractor = feature_extractor

    def __getitem__(self, idx):
        img_path = self.image_files[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert("RGB")
        
        # Ensure that the image has the correct shape (num_channels, height, width)
        image = self.feature_extractor(images=image, return_tensors="pt")["pixel_values"].squeeze(0)

        return {
            "pixel_values": image,
            "labels": torch.tensor(label),
        }

    def __len__(self):
        return len(self.image_files)

# Create dataset instances
train_dataset = WildfireDataset(train_path, feature_extractor)
# val_dataset = WildfireDataset(val_path, feature_extractor)
test_dataset = WildfireDataset(test_path, feature_extractor)

# Create DataLoader instances
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [5]:
# Print label distribution after oversampling
print("\nLabel distribution in the oversampled dataset:")
oversampled_label_counts = torch.bincount(torch.tensor(train_dataset.labels))
for label, count in enumerate(oversampled_label_counts):
    print(f"Label {label}: {count} samples")


Label distribution in the oversampled dataset:
Label 0: 461 samples
Label 1: 538 samples


In [6]:
import torch
from transformers import ViTForImageClassification
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score
import torch.nn as nn

# Define the loss function (criterion)
criterion = nn.CrossEntropyLoss()

# Load the fine-tuned model
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k',num_labels=2)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    model = torch.nn.DataParallel(model)
model.to(device)

# Define optimizer and loss function
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)

# Define training parameters
num_epochs = 5

# for param in model.base_model.parameters():
#     param.requires_grad = False  # Freeze initial layers


# Store losses and accuracies
train_losses = []
accuracies = []

# Training loop
for epoch in range(num_epochs):
    model.train()
    epoch_labels = []
    epoch_predictions = []

    for batch in train_dataloader:
        inputs = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)

        optimizer.zero_grad()
        outputs = model(inputs, labels=labels)
        logits = outputs.logits
        loss = criterion(logits, labels)  # Compute the appropriate loss function here
        loss.backward()
        optimizer.step()

        train_losses.append(loss.item())


        # Store predictions and labels for accuracy calculation
        epoch_labels.extend(labels.cpu().numpy())
        epoch_predictions.extend(torch.argmax(outputs.logits, axis=1).cpu().numpy())

    # Calculate accuracy
    accuracy = accuracy_score(epoch_labels, epoch_predictions)
    accuracies.append(accuracy)

    # Print average training loss and accuracy for the epoch
    avg_train_loss = sum(train_losses[-len(train_dataloader):]) / len(train_dataloader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Avg Train Loss: {avg_train_loss:.4f}, Accuracy: {accuracy:.4f}")

# Store accuracies for further analysis
print("Accuracies:", accuracies)

config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Let's use 2 GPUs!




Epoch 1/5, Avg Train Loss: 0.6135, Accuracy: 0.7247




Epoch 2/5, Avg Train Loss: 0.4512, Accuracy: 0.8559




Epoch 3/5, Avg Train Loss: 0.3129, Accuracy: 0.9269




Epoch 4/5, Avg Train Loss: 0.1906, Accuracy: 0.9700




Epoch 5/5, Avg Train Loss: 0.1177, Accuracy: 0.9820
Accuracies: [0.7247247247247247, 0.8558558558558559, 0.9269269269269269, 0.96996996996997, 0.9819819819819819]


In [7]:
import torch
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score
import numpy as np

# Validation loop
model.eval()
val_losses = []
val_preds = []
val_labels = []

with torch.no_grad():
    for batch in test_dataloader:
        inputs = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)

        # Check for non-empty batches
        if inputs.size(0) == 0:
            continue

        outputs = model(inputs, labels=labels)
        
        # Check for NaN values in the loss
        if np.any(np.isnan(outputs.loss.cpu().numpy())):
            print("NaN loss encountered. Skipping batch.")
            continue

        # Compute the loss using the appropriate criterion
        loss = criterion(outputs.logits, labels)
        val_losses.append(loss.item())

        predictions = torch.argmax(outputs.logits, dim=1)
        val_preds.extend(predictions.cpu().numpy())
        val_labels.extend(labels.cpu().numpy())

# Calculate validation accuracy
val_accuracy = accuracy_score(val_labels, val_preds)
val_loss = sum(val_losses) / len(val_losses)

print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")
print(f"Validation Loss: {val_loss:.4f}")



Validation Accuracy: 84.86%
Validation Loss: 0.3464


In [8]:
# Save the fine-tuned model
if isinstance(model, torch.nn.DataParallel):
    model.module.save_pretrained("fire_subclass_model_VIT_oversampled")
else:
    model.save_pretrained("fire_subclass_model_VIT_oversampled")

In [9]:
# Create a zip archive of a folder
!zip -r fire_subclass_model_VIT_oversampled.zip fire_subclass_model_VIT_oversampled

  adding: fire_subclass_model_VIT_oversampled/ (stored 0%)
  adding: fire_subclass_model_VIT_oversampled/model.safetensors (deflated 7%)
  adding: fire_subclass_model_VIT_oversampled/config.json (deflated 46%)


**Data Augmentation**

In [14]:
pip install imgaug

Note: you may need to restart the kernel to use updated packages.


In [17]:
import imgaug as ia
from imgaug import augmenters as iaa


In [18]:
import os
import torch
from torchvision import transforms
from transformers import ViTFeatureExtractor, ViTForImageClassification
from PIL import Image
from torch.utils.data import DataLoader, Dataset
from collections import defaultdict
import random

# Define the directory paths
train_path = "/kaggle/input/the-wildfire-dataset/the_wildfire_dataset/the_wildfire_dataset/train"
val_path = "/kaggle/input/validation-edited/val"
test_path = "/kaggle/input/the-wildfire-dataset/the_wildfire_dataset/the_wildfire_dataset/test"

# ViT Feature Extractor
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')

from torchvision.transforms import RandomHorizontalFlip, RandomRotation, RandomResizedCrop, ToTensor, Compose

class WildfireDataset(Dataset):
    def __init__(self, folder_path, feature_extractor):
        self.image_files = []  # List to store image file paths
        self.labels = []  # List to store corresponding labels
        self.label_mapping = {
            "Smoke_from_fires": 0,
            "Both_smoke_and_fire": 1,
        }
        self.data_transform = Compose([
            RandomHorizontalFlip(),
            RandomRotation(10),
            RandomResizedCrop(224, scale=(0.8, 1.0)),
            ToTensor(),
        ])

        # Populate image_files and labels based on the clarified folder structure
        for class_label in ["nofire", "fire"]:
                current_path = f"{folder_path}/{class_label}"
                if class_label == "nofire":
                    continue
                else:
                    for subclass_label in ["Smoke_from_fires", "Both_smoke_and_fire"]:
                        current_subclass_path = f"{current_path}/{subclass_label}"
                        image_files = os.listdir(current_subclass_path)
                        self.image_files.extend([f"{current_subclass_path}/{img}" for img in image_files])
                        self.labels.extend([self.label_mapping[subclass_label]] * len(image_files))
            
        
        # Determine the number of samples in class label 1
        self.num_label_1_samples = self.labels.count(1)
        # Determine the number of samples to be added for class label 1 after augmentation
        self.num_augmented_label_1_samples = self.num_label_1_samples

        self.feature_extractor = feature_extractor
        # Define augmentation pipeline specifically for label 1 images
        self.label_1_aug = iaa.Sequential([
            iaa.Affine(rotate=(-30, 30)),  # Random rotation
            iaa.Fliplr(0.5),  # Horizontal flip with 50% probability
            iaa.GaussianBlur(sigma=(0, 0.5)),  # Random Gaussian blur
        ])


    def __getitem__(self, idx):
        img_path = self.image_files[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert("RGB")
        
        if label == 1:
            # Retrieve the image again to create a duplicate with augmentation
            image_duplicate = Image.open(img_path).convert("RGB")
            image_duplicate = self.feature_extractor(images=image_duplicate, return_tensors="pt")["pixel_values"].squeeze(0)

            # Apply augmentation to the duplicate
            image_duplicate = self.label_1_aug(images=[image_duplicate])[0]

            # Return both the original and augmented images, effectively doubling
            return {
                "pixel_values": torch.stack([image, image_duplicate]),
                "labels": torch.tensor([label, label]),
            }
        else:
            image = self.feature_extractor(images=image, return_tensors="pt")["pixel_values"].squeeze(0)
            return {
                "pixel_values": image,
                "labels": torch.tensor(label),
            }
        # If the index is within the original number of samples for class label 1,
        # return the original image for class label 1
#         if idx < self.num_label_1_samples:
#             img_path = self.image_files[idx]
#             label = self.labels[idx]
#             image = Image.open(img_path).convert("RGB")
#             image = self.feature_extractor(images=image, return_tensors="pt")["pixel_values"].squeeze(0)
#             return {
#                 "pixel_values": image,
#                 "labels": torch.tensor(label),
#             }
        
#         # If the index is beyond the original number of samples for class label 1,
#         # apply data augmentation to an image from class label 1
#         augmented_idx = idx - self.num_label_1_samples
#         original_label_1_indices = [i for i, label in enumerate(self.labels) if label == 1]
#         original_image_idx = original_label_1_indices[augmented_idx % self.num_label_1_samples]
#         img_path = self.image_files[original_image_idx]
#         label = self.labels[original_image_idx]
#         image = Image.open(img_path).convert("RGB")
#         augmented_image = self.data_transform(image)
#         augmented_image = self.feature_extractor(images=augmented_image.unsqueeze(0), return_tensors="pt")["pixel_values"].squeeze(0)
#         return {
#             "pixel_values": augmented_image,
#             "labels": torch.tensor(label),
#         }


    def __len__(self):
        return len(self.image_files) + self.num_augmented_label_1_samples


# Define data augmentation transforms
data_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(degrees=30),
    transforms.RandomResizedCrop(size=224, scale=(0.8, 1.0)),
    transforms.ToTensor(),
])

# Create dataset instances
train_dataset = WildfireDataset(train_path, feature_extractor)
# val_dataset = WildfireDataset(val_path, feature_extractor)
test_dataset = WildfireDataset(test_path, feature_extractor)

# Create DataLoader instances
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)




In [20]:
# Print label distribution after oversampling
print("\nLabel distribution in the oversampled dataset:")
oversampled_label_counts = torch.bincount(torch.tensor(train_dataset.labels))
for label, count in enumerate(oversampled_label_counts):
    print(f"Label {label}: {count} samples")


Label distribution in the oversampled dataset:
Label 0: 461 samples
Label 1: 269 samples


**Undersampling**

In [21]:
from collections import defaultdict
import random

# Define a custom dataset class
class WildfireDataset(Dataset):
    def __init__(self, folder_path, feature_extractor, undersample_label=0):
        self.folder_path = folder_path
        self.image_files = []  # List to store image file paths
        self.labels = []  # List to store corresponding labels
        label_mapping = {
            "Smoke_from_fires": 0,
            "Both_smoke_and_fire": 1,
        }
        self.undersample_label = undersample_label

        # Dictionary to store image files for each label
        self.label_images = defaultdict(list)

        # Populate image_files and labels based on the clarified folder structure
        for class_label in ["nofire", "fire"]:
            current_path = f"{folder_path}/{class_label}"
            if class_label == "nofire":
                continue
            else:
                for subclass_label in ["Smoke_from_fires", "Both_smoke_and_fire"]:
                    current_subclass_path = f"{current_path}/{subclass_label}"
                    image_files = os.listdir(current_subclass_path)
                    label = label_mapping[subclass_label]
                    self.label_images[label].extend([f"{current_subclass_path}/{img}" for img in image_files])

        # Determine the number of samples for the undersampled label
        num_samples_undersampled = min(len(self.label_images[undersample_label]), len(self.label_images[1 - undersample_label]))

        # Randomly select a subset of samples for the undersampled label
        undersampled_indices = random.sample(range(len(self.label_images[undersample_label])), num_samples_undersampled)
        undersampled_images = [self.label_images[undersample_label][idx] for idx in undersampled_indices]

        # Combine the undersampled images with the other label's images
        self.image_files = undersampled_images + self.label_images[1 - undersample_label]
        self.labels = [undersample_label] * num_samples_undersampled + [1 - undersample_label] * len(self.label_images[1 - undersample_label])

        self.feature_extractor = feature_extractor

    def __getitem__(self, idx):
        img_path = self.image_files[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert("RGB")
        
        # Ensure that the image has the correct shape (num_channels, height, width)
        image = self.feature_extractor(images=image, return_tensors="pt")["pixel_values"].squeeze(0)

        return {
            "pixel_values": image,
            "labels": torch.tensor(label),
        }

    def __len__(self):
        return len(self.image_files)

# Usage example:
train_path = "/kaggle/input/the-wildfire-dataset/the_wildfire_dataset/the_wildfire_dataset/train"
test_path = "/kaggle/input/the-wildfire-dataset/the_wildfire_dataset/the_wildfire_dataset/test"

# Create dataset instance with undersampling for label 0
train_dataset = WildfireDataset(train_path, feature_extractor, undersample_label=0)

# Create DataLoader instance
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)


In [22]:
# Print label distribution after oversampling
print("\nLabel distribution in the oversampled dataset:")
oversampled_label_counts = torch.bincount(torch.tensor(train_dataset.labels))
for label, count in enumerate(oversampled_label_counts):
    print(f"Label {label}: {count} samples")


Label distribution in the oversampled dataset:
Label 0: 269 samples
Label 1: 269 samples


In [23]:
import torch
from transformers import ViTForImageClassification
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score
import torch.nn as nn

# Define the loss function (criterion)
criterion = nn.CrossEntropyLoss()

# Load the fine-tuned model
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k',num_labels=2)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    model = torch.nn.DataParallel(model)
model.to(device)

# Define optimizer and loss function
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)

# Define training parameters
num_epochs = 5

# for param in model.base_model.parameters():
#     param.requires_grad = False  # Freeze initial layers


# Store losses and accuracies
train_losses = []
accuracies = []

# Training loop
for epoch in range(num_epochs):
    model.train()
    epoch_labels = []
    epoch_predictions = []

    for batch in train_dataloader:
        inputs = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)

        optimizer.zero_grad()
        outputs = model(inputs, labels=labels)
        logits = outputs.logits
        loss = criterion(logits, labels)  # Compute the appropriate loss function here
        loss.backward()
        optimizer.step()

        train_losses.append(loss.item())


        # Store predictions and labels for accuracy calculation
        epoch_labels.extend(labels.cpu().numpy())
        epoch_predictions.extend(torch.argmax(outputs.logits, axis=1).cpu().numpy())

    # Calculate accuracy
    accuracy = accuracy_score(epoch_labels, epoch_predictions)
    accuracies.append(accuracy)

    # Print average training loss and accuracy for the epoch
    avg_train_loss = sum(train_losses[-len(train_dataloader):]) / len(train_dataloader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Avg Train Loss: {avg_train_loss:.4f}, Accuracy: {accuracy:.4f}")

# Store accuracies for further analysis
print("Accuracies:", accuracies)

config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Let's use 2 GPUs!




Epoch 1/5, Avg Train Loss: 0.6595, Accuracy: 0.6394




Epoch 2/5, Avg Train Loss: 0.5742, Accuracy: 0.8309




Epoch 3/5, Avg Train Loss: 0.4863, Accuracy: 0.8792




Epoch 4/5, Avg Train Loss: 0.4018, Accuracy: 0.8941




Epoch 5/5, Avg Train Loss: 0.3245, Accuracy: 0.9182
Accuracies: [0.6394052044609665, 0.8308550185873605, 0.879182156133829, 0.8940520446096655, 0.9182156133828996]


In [32]:
import torch
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score
import numpy as np

# Validation loop
model.eval()
val_losses = []
val_preds = []
val_labels = []

with torch.no_grad():
    for batch in test_dataloader:
        inputs = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)

        # Check for non-empty batches
        if inputs.size(0) == 0:
            continue

        outputs = model(inputs, labels=labels)
        
        # Check for NaN values in the loss
        if np.any(np.isnan(outputs.loss.cpu().numpy())):
            print("NaN loss encountered. Skipping batch.")
            continue

        # Compute the loss using the appropriate criterion
        loss = criterion(outputs.logits, labels)
        val_losses.append(loss.item())

        predictions = torch.argmax(outputs.logits, dim=1)
        val_preds.extend(predictions.cpu().numpy())
        val_labels.extend(labels.cpu().numpy())

# Calculate validation accuracy
val_accuracy = accuracy_score(val_labels, val_preds)
val_loss = sum(val_losses) / len(val_losses)

print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")
print(f"Validation Loss: {val_loss:.4f}")

Validation Accuracy: 83.02%
Validation Loss: 0.4345


In [33]:
# Save the fine-tuned model
if isinstance(model, torch.nn.DataParallel):
    model.module.save_pretrained("fire_subclass_model_VIT_undersampled")
else:
    model.save_pretrained("fire_subclass_model_VIT_undersampled")

In [34]:
# Create a zip archive of a folder
!zip -r fire_subclass_model_VIT_undersampled.zip fire_subclass_model_VIT_undersampled

  adding: fire_subclass_model_VIT_undersampled/ (stored 0%)
  adding: fire_subclass_model_VIT_undersampled/config.json (deflated 46%)
  adding: fire_subclass_model_VIT_undersampled/model.safetensors (deflated 7%)
