In [None]:
# import os, shutil
# shutil.copyfile('__notebook_source__.ipynb', 'make_mnist_rot_dataset')
# os.listdir()

# utils.py

In [None]:
from __future__ import division, print_function
from scipy.linalg import expm, norm
import collections
import itertools
import numpy as np
from torch.autograd import Variable
import torch

def ntuple(n):
    """ Ensure that input has the correct number of elements """
    def parse(x):
        if isinstance(x, collections.Iterable):
            return x
        return tuple(itertools.repeat(x, n))
    return parse

def getGrid(siz):
    """ Returns grid with coordinates from -siz[0]/2 : siz[0]/2, -siz[1]/2 : siz[1]/2, ...."""
    space = [np.linspace( -(N/2), (N/2), N ) for N in siz]
    mesh = np.meshgrid( *space, indexing='ij' )
    mesh = [np.expand_dims( ax.ravel(), 0) for ax in mesh]

    return np.concatenate(mesh)

def rotate_grid_2D(grid, theta):
    """ Rotate grid """
    theta = np.deg2rad(theta)

    x0 = grid[0, :] * np.cos(theta) - grid[1, :] * np.sin(theta)
    x1 = grid[0, :] * np.sin(theta) + grid[1, :] * np.cos(theta)

    grid[0, :] = x0
    grid[1, :] = x1
    return grid

def rotate_grid_3D(theta, axis, grid):
    """ Rotate grid """
    theta = np.deg2rad(theta)
    axis = np.array(axis)
    rot_mat = expm(np.cross(np.eye(3), axis / norm(axis) * theta))
    rot_mat  =np.expand_dims(rot_mat,2)
    grid = np.transpose( np.expand_dims(grid,2), [0,2,1])

    return np.einsum('ijk,jik->ik',rot_mat,grid)


def get_filter_rotation_transforms(kernel_dims, angles):
    """ Return the interpolation variables needed to transform a filter by a given number of degrees """

    dim = len(kernel_dims)

    # Make grid (centered around filter-center)
    grid = getGrid(kernel_dims)

    # Rotate grid
    if dim == 2:
        grid = rotate_grid_2D(grid, angles)
    elif dim == 3:
        grid = rotate_grid_3D(angles[0], [1, 0, 0], grid)
        grid = rotate_grid_3D(angles[1], [0, 0, 1], grid)


    # Radius of filter
    radius = np.min((np.array(kernel_dims)-1) / 2.)

    #Mask out samples outside circle
    radius = np.expand_dims(radius,-1)
    dist_to_center = np.sqrt(np.sum(grid**2,axis=0))
    mask = dist_to_center>=radius+.0001
    mask = 1-mask

    # Move grid to center
    grid += radius

    return compute_interpolation_grids(grid, kernel_dims, mask)

def compute_interpolation_grids(grid, kernel_dims, mask):

    #######################################################
    # The following part is part of nd-linear interpolation

    #Add a small eps to grid so that floor and ceil operations become more stable
    grid += 0.000000001

    # Make list where each element represents a dimension
    grid = [grid[i, :] for i in range(grid.shape[0])]

    # Get left and right index (integers)
    inds_0 = [ind.astype(np.integer) for ind in grid]
    inds_1 = [ind + 1 for ind in inds_0]

    # Get weights
    weights = [float_ind - int_ind for float_ind, int_ind in zip(grid, inds_0)]

    # Special case for when ind_1 == size (while ind_0 == siz)
    # In that case we select ind_0
    ind_1_out_of_bounds = np.logical_or.reduce([ind == siz for ind, siz in zip(inds_1, kernel_dims)])
    for i in range(len(inds_1)):
        inds_1[i][ind_1_out_of_bounds] = 0


    # Get samples that are out of bounds or outside mask
    inds_out_of_bounds = np.logical_or.reduce([ind < 0 for ind in itertools.chain(inds_0, inds_1)] + \
                                              [ind >= siz for ind, siz in zip(inds_0, kernel_dims)] + \
                                              [ind >= siz for ind, siz in zip(inds_1, kernel_dims)] +
                                              (1-mask).astype('bool')
                                              )


    # Set these samples to zero get data from upper-left-corner (which will be put to zero)
    for i in range(len(inds_0)):
        inds_0[i][inds_out_of_bounds] = 0
        inds_1[i][inds_out_of_bounds] = 0

    #Reshape
    inds_0 = [np.reshape(ind,[1,1]+kernel_dims) for ind in inds_0]
    inds_1 = [np.reshape(ind,[1,1]+kernel_dims) for ind in inds_1]
    weights = [np.reshape(weight,[1,1]+kernel_dims)for weight in weights]

    #Make pytorch-tensors of the interpolation variables
    inds_0 = [Variable(torch.LongTensor(ind)) for ind in inds_0]
    inds_1 = [Variable(torch.LongTensor(ind)) for ind in inds_1]
    weights = [Variable(torch.FloatTensor(weight)) for weight in weights]

    #Make mask pytorch tensor
    mask = mask.reshape(kernel_dims)
    mask = mask.astype('float32')
    mask = np.expand_dims(mask, 0)
    mask = np.expand_dims(mask, 0)
    mask = torch.FloatTensor(mask)

    # Uncomment for nearest interpolation (for debugging)
    #inds_1 = [ind*0 for ind in inds_1]
    #weights  = [weight*0 for weight in weights]

    return inds_0, inds_1, weights, mask

def apply_transform(filter, interp_vars, filters_size, old_bilinear_interpolation=True):
    """ Apply a transform specified by the interpolation_variables to a filter """

    dim = 2 if len(filter.size())==4 else 3

    if dim == 2:


        if old_bilinear_interpolation:
            [x0_0, x1_0], [x0_1, x1_1], [w0, w1] = interp_vars
            rotated_filter = (filter[:, :, x0_0, x1_0] * (1 - w0) * (1 - w1) +
                          filter[:, :, x0_1, x1_0] * w0 * (1 - w1) +
                          filter[:, :, x0_0, x1_1] * (1 - w0) * w1 +
                          filter[:, :, x0_1, x1_1] * w0 * w1)
        else:

            # Expand dimmentions to fit filter
            interp_vars = [[inner_el.expand_as(filter) for inner_el in outer_el] for outer_el in interp_vars]

            [x0_0, x1_0], [x0_1, x1_1], [w0, w1] = interp_vars

            a = torch.gather(torch.gather(filter, 2, x0_0), 3, x1_0) * (1 - w0) * (1 - w1)
            b = torch.gather(torch.gather(filter, 2, x0_1), 3, x1_0)* w0 * (1 - w1)
            c = torch.gather(torch.gather(filter, 2, x0_0), 3, x1_1)* (1 - w0) * w1
            d = torch.gather(torch.gather(filter, 2, x0_1), 3, x1_1)* w0 * w1
            rotated_filter = a+b+c+d

        rotated_filter = rotated_filter.view(filter.size()[0],filter.size()[1],filters_size[0],filters_size[1])

    elif dim == 3:
        [x0_0, x1_0, x2_0], [x0_1, x1_1, x2_1], [w0, w1, w2] = interp_vars

        rotated_filter = (filter[x0_0, x1_0, x2_0] * (1 - w0) * (1 - w1)* (1 - w2) +
                          filter[x0_1, x1_0, x2_0] * w0       * (1 - w1)* (1 - w2) +
                          filter[x0_0, x1_1, x2_0] * (1 - w0) * w1      * (1 - w2) +
                          filter[x0_1, x1_1, x2_0] * w0       * w1      * (1 - w2) +
                          filter[x0_0, x1_0, x2_1] * (1 - w0) * (1 - w1)* w2 +
                          filter[x0_1, x1_0, x2_1] * w0       * (1 - w1)* w2 +
                          filter[x0_0, x1_1, x2_1] * (1 - w0) * w1      * w2 +
                          filter[x0_1, x1_1, x2_1] * w0       * w1      * w2)

        rotated_filter = rotated_filter.view(filter.size()[0], filter.size()[1], filters_size[0], filters_size[1], filters_size[2])

    return rotated_filter

print("unitls.py is ok!!!")

# if __name__ == '__main__':
#     """ Test rotation of filter """
#     import torch.nn as nn
#     from torch.nn import functional as F
#     from torch.nn.parameter import Parameter
#     import math
#     from utils import *

#     ks = [9,9] #Kernel size
#     angle = 45
#     interp_vars = get_filter_rotation_transforms(ks, angle)

#     w = Variable(torch.ones([1,1]+ks))
#     #w[:,:,4,:] = 5
#     w[:, :, :, 4] = 5
#     #w[:,:,0,0] = -1


#     print(w)
#     for angle in [0,90,45,180,65,10]:
#         print(angle,'degrees')
#         print(apply_transform(w, get_filter_rotation_transforms(ks, angle)[:-1], ks,old_bilinear_interpolation=True) * Variable(get_filter_rotation_transforms(ks, angle)[-1]))
#         print('Difference', torch.sum(apply_transform(w, get_filter_rotation_transforms(ks, angle)[:-1], ks,old_bilinear_interpolation=False) * Variable( get_filter_rotation_transforms(ks, angle)[-1]) - apply_transform(w, get_filter_rotation_transforms(ks, angle)[:-1], ks,old_bilinear_interpolation=True) * Variable(get_filter_rotation_transforms(ks, angle)[-1])))




# mnist.py

In [None]:
import numpy as np
# import scipy.misc
import sys
import os
# sys.path.append('../')
print(os.getcwd())
# from utils import getGrid, rotate_grid_2D
from imageio import imread


def loadMnist(mode):
    print('Loading MNIST', mode, 'images')
    # Mode = 'train'/'test
    mnist_folder = './'

    with open(mnist_folder + mode + '-labels.csv') as f:
        path_and_labels = f.readlines()

    samples = []
    for entry in path_and_labels:
        path = entry.split(',')[0]
        label = int(entry.split(',')[1])
        img = imread(mnist_folder + path)
        samples.append([img, label])
    return samples


def linear_interpolation_2D(input_array, indices, outside_val=0, boundary_correction=True):
    # http://stackoverflow.com/questions/6427276/3d-interpolation-of-numpy-arrays-without-scipy
    output = np.empty(indices[0].shape)
    ind_0 = indices[0, :]
    ind_1 = indices[1, :]

    N0, N1 = input_array.shape

    x0_0 = ind_0.astype(np.integer)
    x1_0 = ind_1.astype(np.integer)
    x0_1 = x0_0 + 1
    x1_1 = x1_0 + 1

    # Check if inds are beyond array boundary:
    if boundary_correction:
        # put all samples outside datacube to 0
        inds_out_of_range = (x0_0 < 0) | (x0_1 < 0) | (x1_0 < 0) | (x1_1 < 0) | \
                            (x0_0 >= N0) | (x0_1 >= N0) | (x1_0 >= N1) | (x1_1 >= N1)

        x0_0[inds_out_of_range] = 0
        x1_0[inds_out_of_range] = 0
        x0_1[inds_out_of_range] = 0
        x1_1[inds_out_of_range] = 0

    w0 = ind_0 - x0_0
    w1 = ind_1 - x1_0
    # Replace by this...
    # input_array.take(np.array([x0_0, x1_0, x2_0]))
    output = (input_array[x0_0, x1_0] * (1 - w0) * (1 - w1) +
              input_array[x0_1, x1_0] * w0 * (1 - w1) +
              input_array[x0_0, x1_1] * (1 - w0) * w1 +
              input_array[x0_1, x1_1] * w0 * w1)

    if boundary_correction:
        output[inds_out_of_range] = 0

    return output


def loadMnistRot():
    def load_and_make_list(mode):
        data = np.load('mnist_rot/' + mode + '_data.npy')
        lbls = np.load('mnist_rot/' + mode + '_label.npy')
        data = np.split(data, data.shape[2], 2)
        lbls = np.split(lbls, lbls.shape[0], 0)

        return list(zip(data, lbls))

    train = load_and_make_list('train')
    val = load_and_make_list('val')
    test = load_and_make_list('test')
    return train, val, test


def random_rotation(data):
    rot = np.random.rand() * 360  # Random rotation
    grid = getGrid([28, 28])
    grid = rotate_grid_2D(grid, rot)
    grid += 13.5
    data = linear_interpolation_2D(data, grid)
    data = np.reshape(data, [28, 28])
    data = data / float(np.max(data))
    return data.astype('float32')

print('mnist.py is ok!!!')

# download_mnist.py

In [None]:
datasetType = "FashionMNIST" #  FashionMNIST or MNIST
saveDir = ""
# datasetType = 'w'

In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import torchvision
from torch.utils import data
from torchvision import transforms
from imageio import imsave

"""
From:
https://gist.github.com/ischlag/41d15424e7989b936c1609b53edd1390
"""

import gzip
import os
import sys
import time

from six.moves import urllib
from six.moves import xrange    # pylint: disable=redefined-builtin
# from scipy.misc import imsave
# import tensorflow as tf
import numpy as np
import csv

SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'
WORK_DIRECTORY = 'raw_data'
IMAGE_SIZE = 28
NUM_CHANNELS = 1
PIXEL_DEPTH = 255
NUM_LABELS = 10

# def maybe_download(filename):
#     """Download the data from Yann's website, unless it's already here."""
#     if not tf.gfile.Exists(WORK_DIRECTORY):
#         tf.gfile.MakeDirs(WORK_DIRECTORY)
#     filepath = os.path.join(WORK_DIRECTORY, filename)
#     if not tf.gfile.Exists(filepath):
#         filepath, _ = urllib.request.urlretrieve(SOURCE_URL + filename, filepath)
#         with tf.gfile.GFile(filepath) as f:
#             size = f.size()
#         print('Successfully downloaded', filename, size, 'bytes.')
#     return filepath
def download_mnist(data_location, datasetType="MNIST") -> "None":
    """Download the MNIST dataset and then load it into memory.

    Defined in :numref:`sec_mnist`"""

    if not os.path.exists(data_location):
        os.makedirs(data_location)
    # FashionMNIST
    if datasetType == "MNIST":
        mnist_train = torchvision.datasets.MNIST(
            root=data_location, train=True, download=True)
        mnist_test = torchvision.datasets.MNIST(
            root=data_location, train=False, download=True)
        path = data_location + "/MNIST/raw/"
        print(f"MNIST datasets locate in '{path}'")
    elif datasetType == "FashionMNIST":
        mnist_train = torchvision.datasets.FashionMNIST(
            root=data_location, train=True, download=True)
        mnist_test = torchvision.datasets.FashionMNIST(
            root=data_location, train=False, download=True)
        path = data_location + "/FashionMNIST/raw/"
        print(f"FashionMNIST datasets locate in '{path}'")
    else:
        raise ValueError("datasetType is error!!!")
    # for i in os.listdir(data_location + "/MNIST/raw/"):
    #     if i[-3:] != '.gz':
    #         print(path + i)
    return mnist_train, mnist_test


if __name__ == "__main__":
    data_location = "./data/"
    mnistTrain, mnistTest = download_mnist(data_location, datasetType=datasetType)

    train_data = np.array(mnistTrain.data.reshape(*(list(mnistTrain.data.shape)), 1))
    test_data = np.array(mnistTest.data.reshape(*(list(mnistTest.data.shape)), 1))
    train_labels = np.array(mnistTrain.targets)
    test_labels = np.array(mnistTest.targets)
    print(test_labels)
#     这里是原来在project中判断是否在正确的路径，这里由于是用jupyter notebook编写的代码，所以无需判断
#     if 'mnist' not in os.getcwd():
#         print('Path Error!')
#         raise ValueError
    if not os.path.isdir("./train-images"):
        os.makedirs("./train-images")
    if not os.path.isdir("./test-images"):
        os.makedirs("./test-images")

    # process train data
    with open("./train-labels.csv", 'w', newline='') as csvFile:
        writer = csv.writer(csvFile, delimiter=',', quotechar='"')
        for i in range(len(train_data)):
            imsave("./train-images/" + str(i) + ".jpg", train_data[i][:, :, 0])
            writer.writerow(["train-images/" + str(i) + ".jpg", train_labels[i]])
        print("train-labels.csv OK !!!")
            
    # repeat for test data
    with open("./test-labels.csv", 'w', newline='') as csvFile:
        writer = csv.writer(csvFile, delimiter=',', quotechar='"')
        for i in range(len(test_data)):
            imsave("./test-images/" + str(i) + ".jpg", test_data[i][:, :, 0])
            writer.writerow(["test-images/" + str(i) + ".jpg", test_labels[i]])
        print('test-labels.csv OK !!!')

# make_mnist-rot.py 

In [None]:
import numpy as np
import os
# from mnist import random_rotation, loadMnist


def makeMnistRot():
    """
    Make MNIST-rot from MNIST
    Select all training and test samples from MNIST and select 10000 for train,
    2000 for val and 50000 for test. Apply a random rotation to each image.

    Store in numpy file for fast reading

    """
    np.random.seed(0)
    
    #Get all samples
    all_samples = loadMnist('train') + loadMnist('test')

    #Empty arrays
    train_data = np.zeros([28,28,10000])
    train_label = np.zeros([10000])
    val_data = np.zeros([28,28,2000])
    val_label = np.zeros([2000])
    test_data = np.zeros([28,28,50000])
    test_label = np.zeros([50000])

    #new Empty arrays
    new_train_data = np.zeros([10000, 28,28])
    new_val_data = np.zeros([2000, 28,28])
    new_test_data = np.zeros([50000, 28,28])

    i = 0
    for j in range(10000):
        sample =all_samples[i]
        train_data[:, :, j] =  random_rotation(sample[0])
        new_train_data[j, :, :] =  train_data[:, :, j]
        train_label[j] = sample[1]
        i += 1

    for j in range(2000):
        sample = all_samples[i]
        val_data[:, :, j] = random_rotation(sample[0])
        new_val_data[j, :, :] = val_data[:, :, j]
        val_label[j] = sample[1]
        i += 1

    for j in range(50000):
        sample = all_samples[i]
        test_data[:, :, j] = random_rotation(sample[0])
        new_test_data[j, :, :] = test_data[:, :, j]
        test_label[j] = sample[1]
        i += 1
    if datasetType == "FashionMNIST":
        saveDir = "fashionmnist_rotation_new"
    else:
        saveDir = "mnist_rotation_new"
    if not os.path.exists(saveDir):
        os.mkdir(saveDir)
    new_train = np.hstack((new_train_data.reshape(10000, -1), train_label.reshape(10000, 1)))
    new_val = np.hstack((new_val_data.reshape(2000, -1), val_label.reshape(2000, 1)))
    new_train_val = np.vstack((new_train, new_val))
    new_test = np.hstack((new_test_data.reshape(50000, -1), test_label.reshape(50000, 1)))
    print(f"trainslation finished: train_val_shape={new_train_val.shape}, test_shape={new_test.shape}")
    new_train_file_name = saveDir + '/mnist_all_rotation_normalized_float_train_valid.amat'
    new_test_file_name = saveDir + '/mnist_all_rotation_normalized_float_test.amat'
    np.savetxt(new_train_file_name, new_train_val, fmt="%.6f")
    print(f"save the {new_train_file_name} finished...\nlocate in {os.getcwd() + '/' + new_train_file_name}")
    np.savetxt(new_test_file_name, new_test, fmt="%.6f")
    print(f"save the {new_test_file_name} finished...\nlocate in {os.getcwd() + '/' + new_test_file_name}")

    try:
        os.mkdir('mnist_rot/')
    except:
        None

    np.save('mnist_rot/train_data',train_data)
    np.save('mnist_rot/train_label', train_label)
    np.save('mnist_rot/val_data', val_data)
    np.save('mnist_rot/val_label', val_label)
    np.save('mnist_rot/test_data', test_data)
    np.save('mnist_rot/test_label', test_label)

if __name__ == '__main__':
    makeMnistRot()
#     # test new_MNIST_rotation
#     train_file_name = 'mnist_rotation_new/mnist_all_rotation_normalized_float_train_valid.amat'
#     test_file_name = 'mnist_rotation_new/mnist_all_rotation_normalized_float_test.amat'
#     import numpy as np
#     with open(train_file_name) as data_file:
#         data = np.loadtxt(data_file)
#     data.shape
#     plt.imshow(data[0][:-1].reshape(28, -1))

In [None]:
import os
import zipfile
 
 
def zipDir(dirpath, outFullName):
    """
    压缩指定文件夹
    :param dirpath: 目标文件夹路径
    :param outFullName: 压缩文件保存路径+xxxx.zip
    :return: 无
    """
    zip = zipfile.ZipFile(outFullName, "w", zipfile.ZIP_DEFLATED)
    for path, dirnames, filenames in os.walk(dirpath):
        # 去掉目标跟路径，只对目标文件夹下边的文件及文件夹进行压缩
        fpath = path.replace(dirpath, '')
 
        for filename in filenames:
            zip.write(os.path.join(path, filename), os.path.join(fpath, filename))
    zip.close()

In [None]:
if datasetType == "FashionMNIST":
        saveDir = "fashionmnist_rotation_new"
else:
    saveDir = "mnist_rotation_new"
zipDir(saveDir, saveDir + ".zip")
print(f"zip OK. loacate in {saveDir}.zip")