In [60]:
import torch
from torch import nn

# Note: this notebook requires torch >= 1.10.0
torch.__version__

'1.13.1'

In [61]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [62]:
device


'cuda'

In [117]:
import torch
import torch.nn as nn
import torchvision.models as models

class ResNet18(nn.Module):
    def __init__(self, num_classes):
        super(ResNet18, self).__init__()
        self.resnet = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            nn.Sequential(*list(models.resnet18(weights=None).children())[4:-1])
        )
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.resnet(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


In [139]:
from torch.nn.modules.loss import CrossEntropyLoss
import torch.optim as optim
from torch.utils.data import DataLoader

# Define hyperparameters
num_epochs = 100
batch_size = 32
learning_rate = 0.001

# Initialize model and optimizer
model = ResNet18(num_classes=6).to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
loss_fn = CrossEntropyLoss()

In [110]:
import os
from PIL import Image

# Set the path to the directory
path = r'C:\Users\Krumomir\Downloads\RSCD dataset-1million\train'

# Initialize an empty list to store the subdirectories
subdirs = []

# Loop through all the directories and files in the directory
for dirpath, dirnames, filenames in os.walk(path):
    # Loop through all the subdirectories
    for dirname in dirnames:
        # Append the subdirectory to the list
        subdirs.append(os.path.join(dirpath, dirname))

# Initialize an empty list to store the images
image_list = []
counter = 0

labels_names = ['dry', 'fresh', 'ice', 'melted', 'water', 'wet']
dictionary = {}

for counter in range(len(subdirs)):
  # Loop through all the files in the directory
  for filename in os.listdir(subdirs[counter]):
    # Check if the file is an image
    if filename.endswith('.jpg') or filename.endswith('.png'):
        for name in labels_names:
            if name in filename:
                if name not in dictionary:
                    dictionary[name] = []
                dictionary[name].append(subdirs[counter] + "/" + filename)



In [119]:
for name in labels_names:
    print(f'{name} - {len(dictionary[name])}')

dry - 355509
fresh - 73560
ice - 54092
melted - 61093
water - 147257
wet - 267486


In [138]:
dict2 = {}

for name in labels_names:
    if name not in dict2:
        dict2[name] = []
    dict2[name] = dictionary[name][:10000]

In [121]:
for name in labels_names:
    print(f'{name} - {len(dict2[name])}')

dry - 1000
fresh - 1000
ice - 1000
melted - 1000
water - 1000
wet - 1000


In [122]:
dict2["dry"][0]

'C:\\Users\\Krumomir\\Downloads\\RSCD dataset-1million\\train\\dry_asphalt_severe/202201261342122-dry-asphalt-severe.jpg'

In [123]:
import os
from PIL import Image

image_list = []

arr = []

for name in dict2:
    for filename in dict2[name]:
        arr.append(filename)

# Loop through all the files in the directory
for filename in arr:
    img = Image.open(filename)
    # Append the image to the list
    image_list.append(img)

In [124]:
len(arr)

6000

In [125]:
labels = []

for filename in arr:
    for name in labels_names:
        if name in filename:
            labels.append(name)
            break


# for filename in arr:
#   labels.append([name for name in labels_names if name in filename])
#   if 1 < len([name for name in labels_names if name in filename]):
#     print(filename)



In [126]:
labels_names
len(image_list)


6000

In [127]:
from sklearn import preprocessing
import torch

le = preprocessing.LabelEncoder()
targets = le.fit_transform(labels_names)
# targets: array([0, 1, 2, 3, 4])

targets = torch.as_tensor(targets)

In [128]:
labels2 = labels
labels = le.transform(labels)

In [129]:
labels

array([0, 0, 0, ..., 5, 5, 5])

In [130]:
import torch
from torchvision import transforms

# Define the transformations to apply to the images
transform = transforms.Compose([
    transforms.ToTensor()
])

# Define the dataset
class ImageListDataset(torch.utils.data.Dataset):
    def __init__(self, image_list, labels, transform=None):
        self.image_list = image_list
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, index):
        image = self.image_list[index]
        if self.transform:
            image = self.transform(image)
        label = self.labels[index]
        return image, label

dataset = ImageListDataset(image_list, labels, transform=transform)

# Define the data loader

from torch.utils.data import random_split

[training_data, testing_data] = random_split(dataset, [0.7, 0.3], generator=torch.Generator().manual_seed(69))

torch.manual_seed(69)

dataloader_train = torch.utils.data.DataLoader(training_data, batch_size=32, shuffle=True)
dataloader_test = torch.utils.data.DataLoader(testing_data, batch_size=32, shuffle=True)

In [131]:
def train_step(model: torch.nn.Module, 
               dataloader: torch.utils.data.DataLoader, 
               loss_fn: torch.nn.Module, 
               optimizer: torch.optim.Optimizer):
    # Put model in train mode
    model.train()
    
    # Setup train loss and train accuracy values
    train_loss, train_acc = 0, 0
    
    # Loop through data loader data batches
    for batch, (X, y) in enumerate(dataloader):
        
        y = y.type(torch.LongTensor)   # casting to long
    
        X, y = X.to(device), y.to(device)
        # 1. Forward pass
        y_pred = model(X)

        # 2. Calculate  and accumulate loss
        loss = loss_fn(y_pred, y)
        train_loss += loss.item() 

        # 3. Optimizer zero grad
        optimizer.zero_grad()

        # 4. Loss backward
        loss.backward()

        # 5. Optimizer step
        optimizer.step()

        # Calculate and accumulate accuracy metric across all batches
        y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        train_acc += (y_pred_class == y).sum().item()/len(y_pred)

        # Adjust metrics to get average loss and accuracy per batch 
    train_loss = train_loss / len(dataloader)
    train_acc = train_acc / len(dataloader)
    return train_loss, train_acc

In [132]:
def test_step(model: torch.nn.Module, 
              dataloader: torch.utils.data.DataLoader, 
              loss_fn: torch.nn.Module):
    # Put model in eval mode
    model.eval() 
    
    # Setup test loss and test accuracy values
    test_loss, test_acc = 0, 0
    
    # Turn on inference context manager
    with torch.inference_mode():
        # Loop through DataLoader batches
        for batch, (X, y) in enumerate(dataloader):
            
            y = y.type(torch.LongTensor)   # casting to long
            
            X, y = X.to(device), y.to(device)
            
            # 1. Forward pass
            test_pred_logits = model(X)

            # 2. Calculate and accumulate loss
            loss = loss_fn(test_pred_logits, y)
            test_loss += loss.item()
            
            # Calculate and accumulate accuracy
            test_pred_labels = test_pred_logits.argmax(dim=1)
            test_acc += ((test_pred_labels == y).sum().item()/len(test_pred_labels))
            
    # Adjust metrics to get average loss and accuracy per batch 
    test_loss = test_loss / len(dataloader)
    test_acc = test_acc / len(dataloader)
    return test_loss, test_acc

In [133]:
from tqdm.auto import tqdm

def train(model: torch.nn.Module, 
          train_dataloader: torch.utils.data.DataLoader, 
          test_dataloader: torch.utils.data.DataLoader, 
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module = nn.CrossEntropyLoss(),
          epochs: int = 5):
  
    results = {"train_loss": [],
        "train_acc": [],
        "test_loss": [],
        "test_acc": []
    }

    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                          dataloader=train_dataloader,
                                          loss_fn=loss_fn,
                                          optimizer=optimizer)
        
        test_loss, test_acc = test_step(model=model,
          dataloader=test_dataloader,
          loss_fn=loss_fn)

        # 4. Print out what's happening
        print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f}"
        )

      # 5. Update results dictionary
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

  # 6. Return the filled results at the end of the epochs
    return results

In [134]:
labels

array([0, 0, 0, ..., 5, 5, 5])

In [140]:
torch.manual_seed(69)

model_results = train(model, dataloader_train, dataloader_test, optimizer, loss_fn, num_epochs)

  0%|          | 0/100 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.0310 | train_acc: 0.5888 | test_loss: 2.3762 | test_acc: 0.3081
Epoch: 2 | train_loss: 0.7594 | train_acc: 0.6863 | test_loss: 2.2172 | test_acc: 0.3975
Epoch: 3 | train_loss: 0.6775 | train_acc: 0.7259 | test_loss: 1.8296 | test_acc: 0.4123
Epoch: 4 | train_loss: 0.6884 | train_acc: 0.7308 | test_loss: 3.3267 | test_acc: 0.4720
Epoch: 5 | train_loss: 0.6343 | train_acc: 0.7483 | test_loss: 3.4494 | test_acc: 0.3076
Epoch: 6 | train_loss: 0.6074 | train_acc: 0.7628 | test_loss: 1.5002 | test_acc: 0.4852
Epoch: 7 | train_loss: 0.5749 | train_acc: 0.7758 | test_loss: 0.7860 | test_acc: 0.7001
Epoch: 8 | train_loss: 0.5578 | train_acc: 0.7763 | test_loss: 3.4042 | test_acc: 0.4134
Epoch: 9 | train_loss: 0.5582 | train_acc: 0.7794 | test_loss: 0.8337 | test_acc: 0.6952
Epoch: 10 | train_loss: 0.5170 | train_acc: 0.8018 | test_loss: 0.8407 | test_acc: 0.6946
Epoch: 11 | train_loss: 0.5075 | train_acc: 0.8082 | test_loss: 0.8607 | test_acc: 0.6897
Epoch: 12 | train_l

Epoch: 93 | train_loss: 0.0255 | train_acc: 0.9917 | test_loss: 0.4734 | test_acc: 0.8832
Epoch: 94 | train_loss: 0.0444 | train_acc: 0.9860 | test_loss: 0.7889 | test_acc: 0.8268
Epoch: 95 | train_loss: 0.0229 | train_acc: 0.9920 | test_loss: 0.4040 | test_acc: 0.9095
Epoch: 96 | train_loss: 0.0086 | train_acc: 0.9974 | test_loss: 0.3997 | test_acc: 0.9112
Epoch: 97 | train_loss: 0.0160 | train_acc: 0.9941 | test_loss: 0.6246 | test_acc: 0.8547
Epoch: 98 | train_loss: 0.0151 | train_acc: 0.9955 | test_loss: 0.4547 | test_acc: 0.8986
Epoch: 99 | train_loss: 0.0542 | train_acc: 0.9839 | test_loss: 2.2062 | test_acc: 0.6683
Epoch: 100 | train_loss: 0.0334 | train_acc: 0.9875 | test_loss: 0.8984 | test_acc: 0.8284
