In [27]:
import torch 
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
import cv2
import pandas as pd
import scipy.io as sio
from PIL import Image
import torchvision.transforms as transforms
import os

%matplotlib inline

In [28]:
#Load MNIST data
mnist = fetch_openml('mnist_784',version=1,cache=True)


In [29]:
X = mnist.data / 255.0
# X = mnist.data
Y = mnist.target

In [30]:
# dataframe to numpy array
npx = X.values.astype(np.float32)
npy = Y.values

In [31]:
npx = npx.reshape(-1,28,28)

In [32]:
npx.shape

(70000, 28, 28)

In [48]:
x_train, x_test, y_train, y_test = train_test_split(npx, npy, test_size=0.1, shuffle = False, random_state=42)
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)

(63000, 28, 28)
(63000,)
(7000, 28, 28)


In [49]:
a = np.array([1,2,3])
print(a.shape)
a = np.append(a,a[0])
print(a.shape)

(3,)
(4,)


In [50]:
random_rotate = transforms.RandomRotation(degrees = (-50, 50))
x_train3d = np.zeros((x_train.shape[0],28,28,3))
x_test3d = np.zeros((x_test.shape[0],28,28,3))

train_len = x_train.shape[0]
test_len = x_test.shape[0]

for i in range(train_len):
    x_train3d[i] = (cv2.cvtColor(x_train[i], cv2.COLOR_GRAY2RGB) * 255).astype(np.uint8)
    img = Image.fromarray(x_train3d[i], 'RGB')
    img = random_rotate(img)
    x_train3d = np.append(x_train3d,np.expand_dims(np.array(img).astype(np.uint8),axis=0),axis=0)
    y_train = np.append(y_train,y_train[i])
    if(i%10==0):
        print("Train",i)
        print(x_train3d.shape)
        print(y_train.shape)


for i in range(test_len):
    x_test3d[i] = (cv2.cvtColor(x_test[i], cv2.COLOR_GRAY2RGB) * 255).astype(np.uint8)
    img = Image.fromarray(x_test3d[i], 'RGB')
    img = random_rotate(img)
    x_test3d = np.append(x_test3d,np.expand_dims(np.array(img).astype(np.uint8),axis=0),axis=0)
    y_test = np.append(y_test,y_test[i])

Train 0
(63001, 28, 28, 3)
(63001,)
Train 10
(63011, 28, 28, 3)
(63011,)


KeyboardInterrupt: 

In [None]:
print(x_train3d.shape)
print(y_train.shape)
print(x_test3d.shape)
print(y_test.shape)

In [80]:
df_train = pd.DataFrame(x_train3d.reshape(x_train3d.shape[0],-1))
df_test = pd.DataFrame(x_test3d.reshape(x_test3d.shape[0],-1))

In [81]:
df_train['label'] = y_train
df_test['label'] = y_test

In [83]:
# change to csv
MNIST_DIR = './data/MNIST3d'
df_train.to_csv(os.path.join(MNIST_DIR, 'train.csv'), index=False)
df_test.to_csv(os.path.join(MNIST_DIR, 'test.csv'), index=False)


In [84]:
df = pd.read_csv(os.path.join(MNIST_DIR, 'train.csv'))

In [85]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2343,2344,2345,2346,2347,2348,2349,2350,2351,label
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9


In [97]:
df_image = df.loc[:, df.columns != 'label']
df_label = df['label']

In [98]:
df_image.head()
df_label.head()

0    5
1    0
2    4
3    1
4    9
Name: label, dtype: int64

In [99]:
images = df_image.values
labels = df_label.values

In [96]:
images.shape

(63000, 2352)

In [100]:
labels.shape

(63000,)

In [3]:
# load mat file
mat_file = sio.loadmat('./data/SVHN/train_32x32.mat')

In [9]:
print(type(mat_file))
# show the keys
print(mat_file.keys())

<class 'dict'>
dict_keys(['__header__', '__version__', '__globals__', 'X', 'y'])


In [15]:
# get data
X = mat_file['X']
y = mat_file['y']
print(type(X))
print(type(y))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [21]:
print(X.shape)
print(y.shape)
data = np.transpose(X, (3,0,1,2))

(32, 32, 3, 73257)
(73257, 1)


In [26]:
i = 10
img = Image.fromarray(data[i])
print(y[i])
img.show()


[3]
