In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
from torch.utils.data import DataLoader, Subset, Dataset, ConcatDataset
from torchvision.datasets import DatasetFolder
import matplotlib.pyplot as plt
import numpy as np
import time 
from sklearn.manifold import TSNE

  from .autonotebook import tqdm as notebook_tqdm


Neuro network structure:  
Layer1: 3 \* 3 \* 64 filter with stride of 1, padding of 1 and max pooling of 2 \* 2.  
Layer2: 3 \* 3 \* 128 filter with stride of 1, padding of 1 and max pooling of 2 \* 2.  
Layer3: 3 \* 3 \* 256 filter with stride of 1, padding of 1 and max pooling of 2 \* 2.  
Fully connected layer: 256 \* 16 \* 16 -> 256, 1 -> 4 (items of class)

In [2]:
class NeuroNetwork(nn.Module):
    def __init__(self):
        super(NeuroNetwork, self).__init__()
        first_layer = 64
        second_layer = 128
        third_layer = 256
        forth_layer = 512
        fc_layer = 1024
        output = 4
        self.cnn_layers = nn.Sequential(
            # First layer
            nn.Conv2d(in_channels=3,  # RGB 3 layer3
                      out_channels=first_layer,  # Output layer -- the number of filters
                      kernel_size=3,  # Size of filter --3*3
                      stride=1,
                      padding=1),
            nn.BatchNorm2d(first_layer),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),

            # Second layer
            nn.Conv2d(first_layer, second_layer, 3, 1, 1),
            nn.BatchNorm2d(second_layer),
            nn.ReLU(),
            nn.MaxPool2d(2),

            # Third layer
            nn.Conv2d(second_layer, third_layer, 3, 1, 1),
            nn.BatchNorm2d(third_layer),
            nn.ReLU(),
            nn.MaxPool2d(2),

            # Forth layer
            nn.Conv2d(third_layer, forth_layer, 3, 1, 1),
            nn.BatchNorm2d(forth_layer),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.fc_layers = nn.Sequential(
            nn.Linear(forth_layer * 8 * 8, fc_layer),
            nn.BatchNorm1d(fc_layer),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(fc_layer, output)
        )

    def forward(self, x):
        x = self.cnn_layers(x)

        x = x.flatten(1)

        x = self.fc_layers(x)
        return x

Pre-process the raw data:

1. Do data augmentation
2. by resize to 128*128

In [3]:
train_tfm = transforms.Compose([
    transforms.RandomResizedCrop(size=256, scale=(0.7, 1.0)),
    transforms.RandomRotation(degrees=25),
    transforms.RandomHorizontalFlip(),
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
# Load Data
train_set = DatasetFolder("dataset/arcDataset",
                          loader=lambda x: Image.open(x).convert("RGB"),
                          extensions="jpg",
                          transform=train_tfm)
test_set = DatasetFolder("dataset/arcValidset",
                         loader=lambda x: Image.open(x).convert("RGB"),
                         extensions="jpg",
                         transform=test_tfm)

In [None]:
add_pseudo_data = False
if add_pseudo_data:
    pseudo_set = DatasetFolder("data/dataset/arcPseudoset", loader=lambda x: Image.open(x).convert("RGB"), extensions="jpg",
                               transform=train_tfm)

class PseudoDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, id):
        return self.x[id][0], self.y[id]

def get_pseudo_labels(dataset, model, threshold=0.1):
    device = "cuda" if torch.cuda.is_available() else "cpu"

    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

    model.eval()
    softmax = nn.Softmax(dim=-1)

    idx = []
    labels = []

    for i, batch in enumerate(data_loader):
        img, _ = batch
        with torch.no_grad():
            logits = model(img.to(device))
        probs = softmax(logits)

        for j, x in enumerate(probs):
            if torch.max(x) > threshold:
                idx.append(i * batch_size + j)
                labels.append(int(torch.argmax(x)))

    model.train()
    print("\nNew data: {:5d}\n".format(len(idx)))
    dataset = PseudoDataset(Subset(dataset, idx), labels)
    return dataset

In [6]:
# Batch size of 128
batch_size = 128
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_set, batch_size=batch_size)

device = "cuda" if torch.cuda.is_available() else "cpu"

# CrossEntropy loss are applied
cross_entropy = nn.CrossEntropyLoss()
cnn = NeuroNetwork().to(device)
cnn.device = device
optimizer = torch.optim.Adam(cnn.parameters(), lr=0.001, weight_decay=1e-5)

train_loss_record = []
valid_loss_record = []
train_acc_record = []
valid_acc_record = []

In [9]:
def save_model():
    checkpoint = {
        "net": cnn.state_dict(),
        'optimizer':optimizer.state_dict(),
        "epoch": epoch
    }
    import os
    fomat_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())

    torch.save(checkpoint, 'models/checkpoint/autosave_'+fomat_time)

In [10]:
def show_model(X, Y):
    X = X.detach().numpy()
    # print("X.shape",X.shape)
    Y = Y.detach().numpy()
    # print("Y.shape",Y.shape)
    X_min = X.min(0)
    X_max = X.max(0)
    X_norm = (X-X_min) / (X_max - X_min)

    # print(X.min(0))
    tsne = TSNE(n_components = 2, 
                perplexity = 30.,
                learning_rate =10.,
                n_iter=500, 
                random_state=135, 
                init='pca'
                # verbose=1
            )
    x_fit = tsne.fit_transform(X_norm.reshape(X_norm.shape[0],-1),Y)
    # print("x_fit",x_fit.shape)
    y_fit = Y 
    # print("y_fit.shape",y_fit.shape)

 
    classes = list(np.unique(y_fit))
    # print(len(classes))
    # print(x_fit[y_fit == classes[0]].shape)
    markers = 'os' * len(classes)
    colors = plt.cm.rainbow(np.linspace(0, 1, len(classes)))
    # print(colors.shape)
    fig, ax = plt.subplots()
    for i, c in enumerate(classes):
        temp = x_fit[y_fit == c]
        # ax.scatter(*x_fit[y_fit == c].T, marker=markers[i], c=[colors[i]], label=str(c), alpha=0.6)
        ax.scatter(x = temp[:,0],y = temp[:,1], marker=markers[i], c=[colors[i]], label=str(c), alpha=0.6)
    ax.legend()
    ax.axis("off")
    fig.set_facecolor('k')
    plt.show()


# for data,label in train_loader:
#     show_model(data,label)
#     break
# for data,label in test_loader:
#     show_model(data,label)

In [12]:
# Train for 20 times rounds
n_epochs = 150
best_acc = 0.0
for epoch in range(n_epochs):
    print("Epoch: ", epoch)
    start_time = time.time()

    cnn.train()

    train_loss = []
    train_acc = []

    for index,batch in enumerate(train_loader):
        data, labels = batch

        predict = cnn(data.to(device))

        loss = cross_entropy(predict, labels.to(device))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        acc = torch.tensor(predict.argmax(dim=-1) == labels.to(device)).float().mean()
        train_loss.append(loss.item())
        train_acc.append(acc)
    # print("squeezing data")
    # data_temp = torch.cat(data_temp,dim = 0)
    # pred_temp = torch.cat(pred_temp,dim = 0)
    # print("visualizing")
    # show_model(data_temp,pred_temp)

    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_acc) / len(train_acc)
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

    cnn.eval()
    test_loss = []
    test_acc = []
    for batch in  test_loader:
        data, labels = batch

        with torch.no_grad():
            predict = cnn(data.to(device))

        loss = cross_entropy(predict, labels.to(device))

        acc = torch.tensor(predict.argmax(dim=-1) == labels.to(device)).float().mean()
        test_loss.append(loss.item())
        test_acc.append(acc)



    valid_loss = sum(test_loss) / len(test_loss)
    valid_acc = sum(test_acc) / len(test_acc)

    print(f"[ Test | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")

    if valid_acc > best_acc:
        best_acc = valid_acc

    train_loss_record.append(train_loss)
    valid_loss_record.append(valid_loss)
    train_acc_record.append(train_acc)
    valid_acc_record.append(valid_acc)

    # cache the model
    if epoch % 5 == 0:
        save_model()

    end_time = time.time()

    print(f"[Time cost | {epoch + 1:03d}/{n_epochs:03d}]:{end_time - start_time: .4f}s")



Epoch:  0


IndexError: Target 23 is out of bounds.

In [None]:
x = np.arange(len(train_acc_record))
plt.plot(x, train_acc_record, color="blue", label="Train")
plt.plot(x, valid_acc_record, color="red", label="Valid")
plt.legend(loc="upper right")
plt.show()

x = np.arange(len(train_loss_record))
plt.plot(x, train_loss_record, color="blue", label="Train")
plt.plot(x, valid_loss_record, color="red", label="Valid")
plt.legend(loc="upper right")
plt.show()