In [1]:
## Import necessary packages.
import os
import numpy as np
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, SubsetRandomSampler
from torchvision.datasets import DatasetFolder
from sklearn.model_selection import KFold
from matplotlib import pyplot as plt
from scipy.signal import savgol_filter

#If you haven't download the tqdm package, just uncomment the following line.
#!pip install tqdm
# This is for the progress bar.
from tqdm.auto import tqdm

In [2]:
# It is important to do data augmentation in training.
# However, not every augmentation is useful.
# Please think about what kind of augmentation is helpful for food recognition.
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    
    # Randomly rotate the image by up to 10 degrees
    transforms.RandomRotation(degrees=90),
    # Randomly zoom in or out on the image
    transforms.RandomResizedCrop(size=128, scale=(0.7, 1.3)),
    # Randomly crop a portion of the image
    transforms.RandomCrop(size=128, padding=10),
    # Randomly adjust the brightness and contrast of the image
    transforms.ColorJitter(brightness=0.5, contrast=0.5),
    # Convert the image to a tensor
    
    transforms.ToTensor(),
])


# We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

# Batch size for training, validation, and testing.
# A greater batch size usually gives a more stable gradient.
# But the GPU memory is limited, so please adjust it carefully.
batch_size = 400

# Construct datasets.
# The argument "loader" tells how torchvision reads the data.
train_set = DatasetFolder("/kaggle/input/sdsc4016-fundls-of-ml-2-hw2/food-11/food-11/training/labeled", loader=lambda x: Image.open(x), extensions="jpg", transform=train_tfm)
valid_set = DatasetFolder("/kaggle/input/sdsc4016-fundls-of-ml-2-hw2/food-11/food-11/validation", loader=lambda x: Image.open(x), extensions="jpg", transform=test_tfm)
unlabeled_set = DatasetFolder("/kaggle/input/sdsc4016-fundls-of-ml-2-hw2/food-11/food-11/training/unlabeled", loader=lambda x: Image.open(x), extensions="jpg", transform=train_tfm)
test_set = DatasetFolder("/kaggle/input/sdsc4016-fundls-of-ml-2-hw2/food-11/food-11/testing", loader=lambda x: Image.open(x), extensions="jpg", transform=test_tfm)

# Construct data loaders.
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)


In [3]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # The arguments for commonly used modules:
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)

        # input image size: [3, 128, 128]
        self.cnn_layers = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),

            nn.Conv2d(64, 128, 3, 1, 1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),

            nn.Conv2d(128, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(4, 4, 0),
            
            nn.Conv2d(256, 512, 3, 1, 1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(4, 4, 0),
        )
        self.fc_layers = nn.Sequential(
            nn.Linear(512 * 2 * 2, 1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Linear(64, 11)
        )

    def forward(self, x):
        # input (x): [batch_size, 3, 128, 128]
        # output: [batch_size, 11]

        # Extract features by convolutional layers.
        x = self.cnn_layers(x)

        # The extracted feature map must be flatten before going to fully-connected layers.
        x = x.flatten(1)

        # The features are transformed by fully-connected layers to obtain the final logits.
        x = self.fc_layers(x)
        return x

In [4]:
def get_pseudo_labels(dataset, model, threshold=0.65):
    # This functions generates pseudo-labels of a dataset using given model.
    # It returns an instance of DatasetFolder containing images whose prediction confidences exceed a given threshold.
    # You are NOT allowed to use any models trained on external data for pseudo-labeling.
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # Make sure the model is in eval mode.
    model.eval()
    # Define softmax function.
    softmax = nn.Softmax(dim=-1)
    
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    # Iterate over the dataset by batches.
    for batch in tqdm(dataloader):
        img, _ = batch

        # Forward the data
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(img.to(device))

        # Obtain the probability distributions by applying softmax on logits.
        probs = softmax(logits)

        # you may filter the data and construct a new dataset here.

    # # Turn off the eval mode.
    model.train()
    return dataset

In [None]:
def Training( model, device, criterion, optimizer, n_epochs, do_semi, train_loader,test_loader,unlabeled_set):
    history = {'train_loss': [], 'val_loss': [],'train_acc':[],'val_acc':[]}
    for epoch in range(n_epochs):
        # ---------- TODO ----------
        # In each epoch, relabel the unlabeled dataset for semi-supervised learning.
        # Then you can combine the labeled dataset and pseudo-labeled dataset for the training.
        if do_semi:
            # Obtain pseudo-labels for unlabeled data using trained model.
            pseudo_set = get_pseudo_labels(unlabeled_set, model)

            # Construct a new dataset and a data loader for training.
            # This is used in semi-supervised learning only.
            concat_dataset = ConcatDataset([train_set, pseudo_set])
            train_loader = DataLoader(concat_dataset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True)

        # ---------- Training ----------
        # Make sure the model is in train mode before training.
        model.train()

        # These are used to record information in training.
        train_loss = []
        train_accs = []

        # Iterate the training set by batches.
        for batch in tqdm(train_loader):

            # A batch consists of image data and corresponding labels.
            imgs, labels = batch

            # Forward the data. (Make sure data and model are on the same device.)
            logits = model(imgs.to(device))

            # Calculate the cross-entropy loss.
            # We don't need to apply softmax before computing cross-entropy as it is done automatically.
            loss = criterion(logits, labels.to(device))

            # Gradients stored in the parameters in the previous step should be cleared out first.
            optimizer.zero_grad()

            # Compute the gradients for parameters.
            loss.backward()

            # Clip the gradient norms for stable training.
            grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

            # Update the parameters with computed gradients.
            optimizer.step()

            # Compute the accuracy for current batch.
            acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

            # Record the loss and accuracy.
            train_loss.append(loss.item())
            train_accs.append(acc)
        # The average loss and accuracy of the training set is the average of the recorded values.
        train_loss = sum(train_loss) / len(train_loss)
        train_acc = sum(train_accs) / len(train_accs)

        # Print the information.
        print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")
        # ---------- Validation ----------
        # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
        model.eval()

        # These are used to record information in validation.
        valid_loss = []
        valid_accs = []

        # Iterate the validation set by batches.
        for batch in tqdm(valid_loader):

            # A batch consists of image data and corresponding labels.
            imgs, labels = batch

            # We don't need gradient in validation.
            # Using torch.no_grad() accelerates the forward process.
            with torch.no_grad():
                logits = model(imgs.to(device))

            # We can still compute the loss (but not the gradient).
            loss = criterion(logits, labels.to(device))

            # Compute the accuracy for current batch.
            acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

            # Record the loss and accuracy.
            valid_loss.append(loss.item())
            valid_accs.append(acc)
        # The average loss and accuracy for entire validation set is the average of the recorded values.
        valid_loss = sum(valid_loss) / len(valid_loss)
        valid_acc = sum(valid_accs) / len(valid_accs)

        # Print the information.
        print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")
        history['train_loss'].append(train_loss)
        history['val_loss'].append(valid_loss)
        history['train_acc'].append(train_acc)
        history['val_acc'].append(valid_acc) 

    # apply a Savitzky-Golay filter to smooth the curve
    train_loss_smooth = savgol_filter(history['train_loss'], window_length=5, polyorder=1)
    val_loss_smooth = savgol_filter(history['val_loss'], window_length=5, polyorder=1)
    plt.plot([*range(1, n_epochs+1)], train_loss_smooth)
    plt.plot([*range(1, n_epochs+1)], val_loss_smooth)
    plt.xlabel("Epoches")
    plt.ylabel("Loss")
    plt.savefig("./train-val loss.jpg")


    return model,history,train_loss,valid_loss,train_acc,valid_acc
    
    
    
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
# Initialize a model, and put it on the device specified.
model = Classifier().to(device)
#model = torchvision.models.resnet50(pretrained=False).to(device)
model.device = device

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)

# The number of training epochs.
n_epochs = 100
# Whether to do semi-supervised learning.
do_semi = False
batch_size = 256
k=5
splits=KFold(n_splits=k,shuffle=True) 
concat_dataset = ConcatDataset([train_set, train_set])

for fold, (train_idx,val_idx) in enumerate(splits.split(np.arange(len(concat_dataset)))):
    print('Fold {}'.format(fold + 1))
    
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(val_idx)
    train_loader = DataLoader(concat_dataset, batch_size=batch_size,  num_workers=0, sampler=train_sampler, pin_memory=True)
    valid_loader = DataLoader(concat_dataset, batch_size=batch_size, num_workers=0,sampler=valid_sampler, pin_memory=True)
    model,history, train_loss,valid_loss,train_acc,valid_acc = Training(model, device, criterion, optimizer, n_epochs, do_semi, train_loader,test_loader,unlabeled_set)
    break



cpu
Fold 1


  0%|          | 0/18 [00:00<?, ?it/s]

[ Train | 001/100 ] loss = 2.41584, acc = 0.10563


  0%|          | 0/5 [00:00<?, ?it/s]

[ Valid | 001/100 ] loss = 2.39945, acc = 0.09556


  0%|          | 0/18 [00:00<?, ?it/s]

[ Train | 002/100 ] loss = 2.31950, acc = 0.15757


  0%|          | 0/5 [00:00<?, ?it/s]

[ Valid | 002/100 ] loss = 2.35739, acc = 0.14006


  0%|          | 0/18 [00:00<?, ?it/s]

[ Train | 003/100 ] loss = 2.25587, acc = 0.19587


  0%|          | 0/5 [00:00<?, ?it/s]

[ Valid | 003/100 ] loss = 2.31892, acc = 0.15848


  0%|          | 0/18 [00:00<?, ?it/s]

[ Train | 004/100 ] loss = 2.21266, acc = 0.21257


  0%|          | 0/5 [00:00<?, ?it/s]

[ Valid | 004/100 ] loss = 2.18217, acc = 0.23297


  0%|          | 0/18 [00:00<?, ?it/s]

[ Train | 005/100 ] loss = 2.16044, acc = 0.23299


  0%|          | 0/5 [00:00<?, ?it/s]

[ Valid | 005/100 ] loss = 2.16961, acc = 0.23364


  0%|          | 0/18 [00:00<?, ?it/s]

[ Train | 006/100 ] loss = 2.09588, acc = 0.25142


  0%|          | 0/5 [00:00<?, ?it/s]

In [None]:
# Make sure the model is in eval mode.
# Some modules like Dropout or BatchNorm affect if the model is in training mode.
model.eval()

# Initialize a list to store the predictions.
predictions = []

# Iterate the testing set by batches.
for batch in tqdm(test_loader):
    # A batch consists of image data and corresponding labels.
    # But here the variable "labels" is useless since we do not have the ground-truth.
    # If printing out the labels, you will find that it is always 0.
    # This is because the wrapper (DatasetFolder) returns images and labels for each batch,
    # so we have to create fake labels to make it work normally.
    imgs, labels = batch

    # We don't need gradient in testing, and we don't even have labels to compute loss.
    # Using torch.no_grad() accelerates the forward process.
    with torch.no_grad():
        logits = model(imgs.to(device))

    # Take the class with greatest logit as prediction and record it.
    predictions.extend(logits.argmax(dim=-1).cpu().numpy().tolist())

In [None]:
# Save predictions into the file.
with open("predict.csv", "w") as f:

    # The first row must be "Id, Category"
    f.write("Id,Category\n")

    # For the rest of the rows, each image id corresponds to a predicted class.
    for i, pred in  enumerate(predictions):
         f.write(f"{i},{pred}\n")