In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from PIL import Image
import os
import torch.nn.functional as F

In [2]:
class CustomDataset(Dataset):
    def __init__(self, root, train=True, val=False, transform=None):
        self.root = root
        self.transform = transform
        self.images = []
        self.labels = []

        # Load images and labels from your dataset
        if train:
            folder = os.path.join(root, 'train')
        elif val:
            folder = os.path.join(root, 'val')
        else:
            folder = os.path.join(root, 'test')

        for class_folder in os.listdir(folder):
            class_folder_path = os.path.join(folder, class_folder)
            for filename in os.listdir(class_folder_path):
                img_path = os.path.join(class_folder_path, filename)
                label = 0 if class_folder == 'no-snow' else 1
                self.images.append(img_path)
                self.labels.append(label)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        label = self.labels[idx]

        img = Image.open(img_path)
        if self.transform:
            img = self.transform(img)

        return img, label

In [3]:
# Define the data loaders
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomRotation(90),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, 4),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = CustomDataset(root='./data', train=True, transform=transform)
val_dataset = CustomDataset(root='./data', train=False, val=True, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=4, shuffle=False)


In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SpatialAttention(nn.Module):
    def __init__(self, in_channels):
        super(SpatialAttention, self).__init__()
        # Define pooling layers
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        
        # Define 1x1 convolutional layer
        self.conv = nn.Conv2d(in_channels * 2, 1, kernel_size=1)
        
        # Sigmoid activation
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # Compute average and max pooled representations
        avg_out = self.avg_pool(x)
        max_out = self.max_pool(x)
        
        # Concatenate average and max pooled representations
        pooled_features = torch.cat((avg_out, max_out), dim=1)
        
        # Apply 1x1 convolutional layer
        conv_out = self.conv(pooled_features)
        
        # Apply sigmoid activation
        attn_scores = self.sigmoid(conv_out)

        # Apply attention weights to the input features
        attn_output = x * attn_scores

        return attn_output, attn_scores



# Define the architecture of the model
class CustomModel(nn.Module):
    def __init__(self):
        super(CustomModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.spatial_attention = SpatialAttention(in_channels=64)
        self.fc1 = nn.Linear(64 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, 2)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))

        # Apply spatial attention
        x, _ = self.spatial_attention(x)

        x = x.view(-1, 64 * 8 * 8)  # Reshape to (batch, features)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Instantiate the model
model = CustomModel().cuda()

# Define the loss function and the optimizer
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [5]:
# Assuming you have a validation dataset and a validation loader named val_loader
best_val_loss = float('inf')

num_epochs = 25
device = 'cuda'

for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        # Move data to the appropriate device if using GPU
        data, target = data.to(device), target.to(device)
        output = model(data)
        loss = loss_function(output, target)
        loss.backward()
        # Optionally clip gradients
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch + 1, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

    # Validation
    model.eval()  # Set the model to evaluation mode
    val_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            val_loss += loss_function(output, target).item()  # Accumulate validation loss
            pred = output.argmax(dim=1, keepdim=True)  # Get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    val_loss /= len(val_loader.dataset)
    accuracy = correct / len(val_loader.dataset) * 100
    print('Validation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)'.format(
        val_loss, correct, len(val_loader.dataset), accuracy))
    
    torch.save(model.state_dict(), f'epoch_{epoch}.pt')

    # Save the model if validation loss has decreased
    if val_loss < best_val_loss:
        print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model...'.format(
            best_val_loss, val_loss))
        torch.save(model.state_dict(), 'best_model.pt')
        best_val_loss = val_loss


Validation set: Average loss: 0.1923, Accuracy: 9/18 (50.00%)
Validation loss decreased (inf --> 0.192301). Saving model...
Validation set: Average loss: 0.1623, Accuracy: 15/18 (83.33%)
Validation loss decreased (0.192301 --> 0.162294). Saving model...
Validation set: Average loss: 0.0982, Accuracy: 14/18 (77.78%)
Validation loss decreased (0.162294 --> 0.098179). Saving model...
Validation set: Average loss: 0.0703, Accuracy: 16/18 (88.89%)
Validation loss decreased (0.098179 --> 0.070303). Saving model...
Validation set: Average loss: 0.1539, Accuracy: 15/18 (83.33%)
Validation set: Average loss: 0.1524, Accuracy: 13/18 (72.22%)
Validation set: Average loss: 0.0789, Accuracy: 16/18 (88.89%)
Validation set: Average loss: 0.1476, Accuracy: 14/18 (77.78%)
Validation set: Average loss: 0.0599, Accuracy: 17/18 (94.44%)
Validation loss decreased (0.070303 --> 0.059922). Saving model...
Validation set: Average loss: 0.0597, Accuracy: 17/18 (94.44%)
Validation loss decreased (0.059922 --> 0

In [24]:
# Instantiate your model
model = CustomModel()

# Load the saved model parameters
model.load_state_dict(torch.load('best_model.pt'))

# Test the best model
correct = 0
total = 0
with torch.no_grad():
    for data, target in val_loader:
        output = model(data)
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

print('Accuracy of the model on the 18 val images: {} %'.format(100 * correct / total))

Accuracy of the model on the 18 val images: 72.22222222222223 %


In [7]:
def f2_score(true_labels, predictions):
    true_positives = sum((tl == 1) and (p == 1) for tl, p in zip(true_labels, predictions))
    false_positives = sum((tl == 0) and (p == 1) for tl, p in zip(true_labels, predictions))
    false_negatives = sum((tl == 1) and (p == 0) for tl, p in zip(true_labels, predictions))

    beta = 2  # Weight assigned to recall
    precision = true_positives / (true_positives + false_positives)
    recall = true_positives / (true_positives + false_negatives)
    
    if precision == 0 or recall == 0:
        return 0
    
    f2 = (1 + beta**2) * (precision * recall) / ((beta**2 * precision) + recall)
    return f2

In [8]:
test_dataset = CustomDataset(root='./data', train=False, val=False, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=4, shuffle=False)

In [29]:
from sklearn.metrics import accuracy_score, precision_score, recall_score

# Predict the classes for the test dataset
predicted_classes = []
with torch.no_grad():
    for data, target in test_loader:
        output = model(data)
        _, predicted = torch.max(output.data, 1)
        predicted_classes.extend(predicted.tolist())

# Create a list of labels from the test dataset
y_true = []
with torch.no_grad():
    for _, target in test_loader:
        y_true.extend(target.tolist())

# Calculate different metrics
accuracy = accuracy_score(y_true, predicted_classes)
precision = precision_score(y_true, predicted_classes, average='weighted')
recall = recall_score(y_true, predicted_classes, average='weighted')
f2 = f2_score(y_true, predicted_classes)

print('Accuracy:', accuracy)
print('Precision:', precision)
print('Recall:', recall)
print('F2-score:', f2)

Accuracy: 0.8636363636363636
Precision: 0.8666666666666668
Recall: 0.8636363636363636
F2-score: 0.8928571428571429


In [30]:
print(predicted_classes)
print(y_true)

[1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
