This repository has been archived by the owner on Mar 7, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataparser.py
78 lines (68 loc) · 2.63 KB
/
dataparser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import numpy as np
import filters
import numpy as np
import matplotlib.pyplot as plt
from utils.storage import *
class DataParser:
def __init__(self, filters=None):
self.testImages = 100
self.classes = 10
self.total = 100 * self.classes
if(filters == None):
self.trainingInput = np.zeros((self.total, 28, 28))
self.trainingOutput = np.zeros((self.total, self.classes))
self.applyKernels = False
else:
self.trainingInput = np.zeros((self.total * filters.kernelCount, (28 * 28)))
self.trainingOutput = np.zeros((self.total * filters.kernelCount, self.classes))
self.filters = filters
self.applyKernels = True
self.testingInput = np.zeros((self.testImages, 28 , 28))
self.testingOutput = np.zeros((self.testImages, self.classes))
self.index = 0
self.load()
def normalize(self, inputImage):
if((np.min(inputImage) != 0 or np.max(inputImage) != 0) and not (np.max(inputImage) == np.min(inputImage))):
normalized = (inputImage-np.min(inputImage))/(np.max(inputImage)-np.min(inputImage))
return normalized
else:
return inputImage
def listLabel(self, listNumber, length):
output = [0,] * length
output[int(listNumber[0])] = 1
return output
def parsed(self, image):
imageNormalized = self.normalize(image)
if(self.applyKernels):
filterImages = self.filters.applyKernels(imageNormalized)
return filterImages
return imageNormalized
def addImage(self, image, listNumber):
if(self.applyKernels):
filterImages = self.parsed(image)
for filterid in range(0, self.filters.kernelCount):
self.trainingInput[self.index, :] = np.hstack(filterImages[filterid, :, :])
self.trainingOutput[self.index, :] = self.listLabel(listNumber, self.classes)
self.index += 1
else:
self.trainingInput[self.index, :] = self.parsed(image)
self.trainingOutput[self.index, :] = self.listLabel(listNumber, self.classes)
self.index += 1
return self.index
def load(self):
train_data = np.loadtxt(getpath() + "dataset/mnist_train.csv", delimiter=",")
fac = 750 * 0.99 + 0.01
images = np.asfarray(train_data[:, 1:]) / fac
labels = np.asfarray(train_data[:, :1])
# training
for i in range(self.total):
self.addImage(images[i].reshape((28,28)),
labels[i].tolist())
# testing
for i in range(self.total, self.total+self.testImages):
self.testingInput[self.total - i, :] = images[i].reshape((28,28))
self.testingOutput[self.total - i, :] = self.listLabel(labels[i].tolist(), self.classes)
indices = np.arange(self.trainingInput.shape[0])
np.random.shuffle(indices)
self.trainingInput = self.trainingInput[indices]
self.trainingOutput = self.trainingOutput[indices]