### Below is the code for 'Rotation-equivariant convolutional neural network ensembles in image processing', published in UbiComp/ISWC '19 Adjunct 2019.

Abstract of Essay:
For the present engineering of neural networks, rotation invariant is hard to be obtained. Rotation symmetry is an important characteristic in our physical world. In image recognition, using rotated images would largely decrease the performance of neural networks. This situation seriously hindered the application of neural networks in the real-world, such as human tracking, self-driving cars, and intelligent surveillance. In this paper, we would like to present a rotation-equivariant design of convolutional neural network ensembles to counteract the problem of rotated image processing task. This convolutional neural network ensembles combine multiple convolutional neural networks trained by different ranges of rotation angles respectively. In our proposed theory, the model lowers the training difficulty by learning with smaller separations of random rotation angles instead of a huge one. Experiments are reported in this paper. The convolutional neural network ensembles could reach 96.35% on rotated MNIST datasets, 84.9% on rotated Fashion-MNIST datasets, and 91.35% on rotated KMNIST datasets. These results are comparable to current state-of-the-art performance.


## Step 1: Import useful packages

In [0]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
from torchvision.transforms import Compose, ToTensor, Normalize, Resize
import torchvision.transforms.functional as TF
from torchvision.models.resnet import ResNet, BasicBlock
from torchvision.datasets import MNIST
from tqdm.autonotebook import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
import inspect
import time
import numpy as np



## Step 2: Load and normalizing the MNIST training datasets


In [0]:
means = deviations = [0.5]
train_transform=[]
start_angle=-270
end_angle=0
rotate_angle=45
ensemble_num=(360//rotate_angle)+1
for i in range(ensemble_num):
  train_transform.append(transforms.Compose([transforms.RandomRotation([start_angle,end_angle]),
                                         transforms.ToTensor(),
                                         transforms.Normalize(means, deviations)]))
  start_angle=-270+rotate_angle*(i+1)
  end_angle=rotate_angle*(i+1)

# add trainset 
trainset=[]
for i in range(ensemble_num):
  trainset.append(torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=train_transform[i]))
# add trainloader
trainloader=[]
for i in range(ensemble_num):
    trainloader.append(torch.utils.data.DataLoader(trainset[i], batch_size=128,
                                          shuffle=True, num_workers=2))
  

## Step 3: Define a Convolutional Neural Network

In [0]:
class MnistResNet(nn.Module):  
    def __init__(self):
        super(MnistResNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.drop_out = nn.Dropout()
        self.fc1 = nn.Linear(7 * 7 * 64, 1000)
        self.fc2 = nn.Linear(1000, 10)
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.drop_out(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

## Step 4: Define Loss functions and optimizers

In [0]:
#define loss function
criterion = nn.CrossEntropyLoss()

# use GPU
device = torch.device("cuda:0")

# define models
model=[]
for i in range(ensemble_num):
  model.append(MnistResNet().to(device))
  
# define optimizers
optimizer=[]
for i in range(ensemble_num):
  optimizer.append(optim.SGD(model[i].parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4))

## Step 5：Train each ensemble network

In [17]:
epoch_range = 6
# training loop + eval loop
for ensemble_id in range(ensemble_num):
    running_loss = 0.0
    print("Loss of ensemble model",ensemble_id)
    for epoch in range(epoch_range):
      for i, data in enumerate(trainloader[ensemble_id], 0):
          # get the inputs
          inputs, labels = data
  #         print(labels.numpy().shape)

          inputs, labels = inputs.to(device), labels.to(device)

          # zero the parameter gradients
          optimizer[ensemble_id].zero_grad()

          # forward + backward + optimize
          outputs = model[ensemble_id](inputs)
          loss = criterion(outputs, labels)
          loss.backward()
          optimizer[ensemble_id].step()

          # print statistics
          running_loss += loss.item()
          if i % 20 == 19:    # print every 2000 mini-batches
              print('[%d, %5d] loss: %.6f' %
                    (epoch + 1, i + 1, running_loss / 2000))
              running_loss = 0.0

Loss of ensemble model 0
[1,    20] loss: 0.022678
[1,    40] loss: 0.020412
[1,    60] loss: 0.015858
[1,    80] loss: 0.013281
[1,   100] loss: 0.011597
[1,   120] loss: 0.010234
[1,   140] loss: 0.008632
[1,   160] loss: 0.008028
[1,   180] loss: 0.008113
[1,   200] loss: 0.007240
[1,   220] loss: 0.007296
[1,   240] loss: 0.006849
[1,   260] loss: 0.006331
[1,   280] loss: 0.006468
[1,   300] loss: 0.006063
[1,   320] loss: 0.005801
[1,   340] loss: 0.005738
[1,   360] loss: 0.005753
[1,   380] loss: 0.005508
[1,   400] loss: 0.005297
[1,   420] loss: 0.005137
[1,   440] loss: 0.004715
[1,   460] loss: 0.004737
[2,    20] loss: 0.006874
[2,    40] loss: 0.004929
[2,    60] loss: 0.004289
[2,    80] loss: 0.004360
[2,   100] loss: 0.004587
[2,   120] loss: 0.004260
[2,   140] loss: 0.004512
[2,   160] loss: 0.004383
[2,   180] loss: 0.003911
[2,   200] loss: 0.004142
[2,   220] loss: 0.003780
[2,   240] loss: 0.003992
[2,   260] loss: 0.004044
[2,   280] loss: 0.003863
[2,   300] lo

## Step 6: Form the encoded sets

In [18]:
correct = 0
train_data = []
train_label = []
for i in range(ensemble_num):
  with torch.no_grad():
    for data in trainloader[i]:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs=[]
        for i in range(ensemble_num):
          outputs.append(model[i](images))
          outputs[i]=outputs[i].cpu().numpy()

        labels = labels.cpu().numpy()
        for i in range(len(outputs[0])):
            d = np.concatenate((outputs[0][i], outputs[1][i]), axis=None)
            d = np.concatenate((d, outputs[2][i]), axis=None)
            d = np.concatenate((d, outputs[3][i]), axis=None)
            d = np.concatenate((d, outputs[4][i]), axis=None)
            d = np.concatenate((d, outputs[5][i]), axis=None)
            d = np.concatenate((d, outputs[6][i]), axis=None)
         
            train_data.append(d)
            train_label.append(labels[i])
            
print(len(train_label))


540000


## Step 7: Test correction rate under several Classifers

In [19]:
#Correction rate under KNeighborsClassifier
X_train, X_test, y_train, y_test = train_test_split(train_data, train_label, test_size=0.05, random_state=0)
neigh = KNeighborsClassifier(n_neighbors=10)
neigh.fit(X_train, y_train) 
pred = neigh.predict(X_test)
total = 0
correct = 1
for i in range(len(y_test)):
    total += 1
    if y_test[i] == pred[i]:
        correct += 1
print("The correction rate under KNN is:")
print(correct * 1.0 / total * 1.0)

The correction rate under KNN is:
0.9556296296296296


In [20]:
#Correction rate under DecisionTreeClassifier
X_train, X_test, y_train, y_test = train_test_split(train_data, train_label, test_size=0.05, random_state=0)

tree = DecisionTreeClassifier(random_state=0)
tree.fit(X_train, y_train) 

pred = tree.predict(X_test)

total = 0
correct = 1
for i in range(len(y_test)):
    total += 1
    if y_test[i] == pred[i]:
        correct += 1
print("The correction rate under Decision Tree is:")
print(correct * 1.0 / total * 1.0)

The correction rate under Decision Tree is:
0.9021111111111111


In [21]:
#Correction rate under RandomForestClassifier
X_train, X_test, y_train, y_test = train_test_split(train_data, train_label, test_size=0.05, random_state=0)

forest = RandomForestClassifier(n_estimators=100, max_depth=6, random_state=0)
forest.fit(X_train, y_train) 

pred = forest.predict(X_test)
total = 0
correct = 1
for i in range(len(y_test)):
    total += 1
    if y_test[i] == pred[i]:
        correct += 1
print("The correction rate under Random Forest is:")
print(correct * 1.0 / total * 1.0)

The correction rate under Random Forest is:
0.8998888888888888


In [22]:
#Correction rate under MLPClassifier
X_train, X_test, y_train, y_test = train_test_split(train_data, train_label, test_size=0.05, random_state=0)

mlp = MLPClassifier(alpha=1, max_iter=1000)
mlp.fit(X_train, y_train) 

pred = mlp.predict(X_test)
total = 0
correct = 1
for i in range(len(y_test)):
    total += 1
    if y_test[i] == pred[i]:
        correct += 1
print("The correction rate under MLPClassifier is:")
print(correct * 1.0 / total * 1.0)

The correction rate under MLPClassifier is:
0.9476666666666667
