In [1]:
#!pip install scikit-learn

In [2]:
#!pip install shapely

In [None]:
import torch
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset, random_split
from sklearn.preprocessing import LabelEncoder
import os
from PIL import Image
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision.models import resnet18

In [None]:
class DisasterTypeDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        """
        Args:
            image_dir (string): Directory with all post-disaster images.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.image_dir = image_dir
        self.transform = transform
        self.image_filenames = os.listdir(image_dir)
        self.label_encoder = LabelEncoder()
        disaster_types = [self.extract_disaster_type(f) for f in self.image_filenames]
        self.labels = self.label_encoder.fit_transform(disaster_types)

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_dir, self.image_filenames[idx])
        image = Image.open(img_name)
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(label, dtype=torch.long)

    def extract_disaster_type(self, filename):
        parts = filename.split('_')
        first_part = parts[0]
        disaster_types = ["hurricane", "fire", "wind", "flooding", "tsunami", "earthquake"] # There is no wind in data, but web page said there is
        for disaster_type in disaster_types:
            if disaster_type in first_part:
                return disaster_type
        return "unknown"

    def get_disaster_types(self):
        return self.label_encoder.classes_


def get_transform():
    # Convert PIL images to tensors and normalize them
    return transforms.Compose([
        transforms.Resize((256, 256)),  # resize images to uniform size
        transforms.ToTensor(),          # convert to tensor
        transforms.Normalize(mean=[0.485, 0.456, 0.406],  
                             std=[0.229, 0.224, 0.225])   
    ])


In [None]:
class CNN_model(nn.Module):
    def __init__(self, num_classes):
        super(CNN_model, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 32 * 32, 128)  #the input features
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 64 * 32 * 32)  #the flattening
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [6]:
# Define the training function
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_acc = 100 * correct / total
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader):.4f}, Accuracy: {train_acc:.2f}%')

        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_acc = 100 * correct / total
        print(f'Validation Loss: {val_loss/len(val_loader):.4f}, Accuracy: {val_acc:.2f}%')

    print("Finished Training")



In [7]:
# Main function
def main():
    image_dir = 'post_disaster'
    transform = get_transform()
    dataset = DisasterTypeDataset(image_dir, transform=transform)
    num_classes = len(set(dataset.labels))

    # Split dataset into train, validation, and test sets
    train_set, val_set, test_set = random_split(dataset, [int(0.7 * len(dataset)), 
                                                           int(0.15 * len(dataset)), 
                                                           len(dataset) - int(0.7 * len(dataset)) - int(0.15 * len(dataset))])

    # Create data loaders
    train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=32, shuffle=False)

    # ResNet-18 model
    teacher_model = resnet18(pretrained=True)
    num_ftrs = teacher_model.fc.in_features
    teacher_model.fc = nn.Linear(num_ftrs, num_classes)

    # Define loss function and optimizer for the teacher model
    criterion_teacher = nn.CrossEntropyLoss()
    optimizer_teacher = optim.Adam(teacher_model.parameters(), lr=0.001)

    # Train the teacher model
    train_model(teacher_model, train_loader, val_loader, criterion_teacher, optimizer_teacher)

    # Instantiate the student model
    student_model = CNN_model(num_classes)

    # Define loss function and optimizer for the student model
    criterion_student = nn.CrossEntropyLoss()
    optimizer_student = optim.Adam(student_model.parameters(), lr=0.001)

    # Train the student model
    train_model(student_model, train_loader, val_loader, criterion_student, optimizer_student)

if __name__ == '__main__':
    main()

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [01:20<00:00, 585kB/s] 


Epoch 1/10, Loss: 0.4879, Accuracy: 84.74%
Validation Loss: 0.4507, Accuracy: 87.11%
Epoch 2/10, Loss: 0.2656, Accuracy: 91.93%
Validation Loss: 1.4581, Accuracy: 74.94%
Epoch 3/10, Loss: 0.2183, Accuracy: 93.26%
Validation Loss: 0.1917, Accuracy: 94.03%
Epoch 4/10, Loss: 0.1343, Accuracy: 95.87%
Validation Loss: 1.2552, Accuracy: 73.99%
Epoch 5/10, Loss: 0.1439, Accuracy: 95.76%
Validation Loss: 0.1549, Accuracy: 94.75%
Epoch 6/10, Loss: 0.0829, Accuracy: 97.55%
Validation Loss: 0.2717, Accuracy: 90.21%
Epoch 7/10, Loss: 0.0898, Accuracy: 96.89%
Validation Loss: 0.3653, Accuracy: 88.78%
Epoch 8/10, Loss: 0.0680, Accuracy: 97.60%
Validation Loss: 0.1435, Accuracy: 94.75%
Epoch 9/10, Loss: 0.1735, Accuracy: 93.67%
Validation Loss: 0.3100, Accuracy: 90.93%
Epoch 10/10, Loss: 0.0844, Accuracy: 96.89%
Validation Loss: 0.1385, Accuracy: 95.47%
Finished Training
Epoch 1/10, Loss: 1.2685, Accuracy: 49.36%
Validation Loss: 0.9727, Accuracy: 67.78%
Epoch 2/10, Loss: 0.6986, Accuracy: 74.22%
Val