In [1]:
#!pip install scikit-learn

In [2]:
#!pip install shapely

In [3]:
import torch
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset, random_split
from sklearn.preprocessing import LabelEncoder
import os
from PIL import Image
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision.models import resnet18
from torch.optim.lr_scheduler import StepLR

In [4]:
class DisasterTypeDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        """
        Args:
            image_dir (string): Directory with all post-disaster images.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.image_dir = image_dir
        self.transform = transform
        self.image_filenames = [f for f in os.listdir(image_dir) if os.path.isfile(os.path.join(image_dir, f))]  # Filter out directories
        self.label_encoder = LabelEncoder()
        disaster_types = [self.extract_disaster_type(f) for f in self.image_filenames]
        self.labels = self.label_encoder.fit_transform(disaster_types)

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_dir, self.image_filenames[idx])
        image = Image.open(img_name)
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(label, dtype=torch.long)

    def extract_disaster_type(self, filename):
        parts = filename.split('_')
        first_part = parts[0]
        disaster_types = ["hurricane", "fire", "wind", "flooding", "tsunami", "earthquake"] # There is no wind in data, but web page said there is
        for disaster_type in disaster_types:
            if disaster_type in first_part:
                return disaster_type
        return "unknown"

    def get_disaster_types(self):
        return self.label_encoder.classes_



In [5]:
# Add more transformations methods (e.g. random rotations, flips, and color adjustments)
def get_augmented_transform():
    return transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

def distillation_loss(student_outputs, teacher_outputs, labels, T=2.0, alpha=0.7, weight_loss_factor=0.5):
    soft_loss = nn.KLDivLoss()(F.log_softmax(student_outputs/T, dim=1),
                               F.softmax(teacher_outputs/T, dim=1)) * (alpha * T * T)
    hard_loss = F.cross_entropy(student_outputs, labels) * (1. - alpha)
    weight_tensor = torch.full((6,), weight_loss_factor, device=student_outputs.device)  # Assuming 6 classes
    weight_loss = F.cross_entropy(student_outputs, labels, weight=weight_tensor)

    return soft_loss + hard_loss + weight_loss



In [6]:
class CNN_model(nn.Module):
    def __init__(self, num_classes):
        super(CNN_model, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout1 = nn.Dropout(p=0.25)
        self.dropout2 = nn.Dropout(p=0.5)
        self.batchnorm1 = nn.BatchNorm2d(64)
        self.batchnorm2 = nn.BatchNorm2d(128)
        self.batchnorm3 = nn.BatchNorm2d(256)
        self.batchnorm4 = nn.BatchNorm2d(512)
        self.fc1 = nn.Linear(512 * 16 * 16, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.batchnorm1(self.conv1(x.float()))))
        x = self.pool(F.relu(self.batchnorm2(self.conv2(x))))
        x = self.pool(F.relu(self.batchnorm3(self.conv3(x))))
        x = self.pool(F.relu(self.batchnorm4(self.conv4(x))))
        x = x.view(-1, 512 * 16 * 16)
        x = self.dropout1(x)
        x = F.relu(self.fc1(x))
        x = self.dropout2(x)
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [7]:
# Define the training function
def train_model(model, teacher_model, train_loader, val_loader, criterion, optimizer, num_epochs=10):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    teacher_model = teacher_model.to(device)
    teacher_model.eval()
    scheduler = StepLR(optimizer, step_size=3, gamma=0.1)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()

            student_outputs = model(images)
            teacher_outputs = teacher_model(images)
            loss = distillation_loss(student_outputs, teacher_outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(student_outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_acc = 100 * correct / total
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader):.4f}, Accuracy: {train_acc:.2f}%')

        scheduler.step()

        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_acc = 100 * correct / total
        print(f'Validation Loss: {val_loss/len(val_loader):.4f}, Accuracy: {val_acc:.2f}%')

    print("Finished Training")

In [None]:
# Main function
def main():
    image_dir = 'post_disaster'
    transform = get_augmented_transform()
    dataset = DisasterTypeDataset(image_dir, transform=transform)
    num_classes = len(set(dataset.labels))

    train_set, val_set, test_set = random_split(dataset, [int(0.7 * len(dataset)),
                                                           int(0.15 * len(dataset)),
                                                           len(dataset) - int(0.7 * len(dataset)) - int(0.15 * len(dataset))])

    train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=32, shuffle=False)

    teacher_model = resnet18(pretrained=True)
    teacher_model.fc = nn.Linear(teacher_model.fc.in_features, num_classes)

    student_model = CNN_model(num_classes)

    criterion = nn.CrossEntropyLoss()
    optimizer_teacher = optim.Adam(teacher_model.parameters(), lr=0.001)
    optimizer_student = optim.Adam(student_model.parameters(), lr=0.001)

    train_model(teacher_model, student_model, train_loader, val_loader, criterion, optimizer_teacher, 10)

if __name__ == '__main__':
    main()



Epoch 1/10, Loss: 0.8831, Accuracy: 81.83%
Validation Loss: 0.5641, Accuracy: 81.53%
Epoch 2/10, Loss: 0.6640, Accuracy: 89.20%
Validation Loss: 0.3061, Accuracy: 91.20%
Epoch 3/10, Loss: 0.5757, Accuracy: 91.58%
Validation Loss: 0.3140, Accuracy: 90.76%
Epoch 4/10, Loss: 0.4629, Accuracy: 95.45%
Validation Loss: 0.1754, Accuracy: 96.07%
Epoch 5/10, Loss: 0.4335, Accuracy: 96.31%
Validation Loss: 0.1680, Accuracy: 97.38%
Epoch 6/10, Loss: 0.4139, Accuracy: 96.93%
Validation Loss: 0.1448, Accuracy: 98.25%
Epoch 7/10, Loss: 0.3971, Accuracy: 97.55%
Validation Loss: 0.1442, Accuracy: 97.96%
