-
Notifications
You must be signed in to change notification settings - Fork 13
/
FileReaderMNIST.cs
116 lines (104 loc) · 4.35 KB
/
FileReaderMNIST.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
using System.IO;
using System.IO.Compression;
using System.Collections.Generic;
using NumSharp;
namespace MNIST.IO
{
public class ImageData
{
public byte Label { get; set; }
public byte[,] Image { get; set; }
public double[,] AsDouble()
{
var res = new double[Image.GetLength(0), Image.GetLength(1)];
for (var x = 0; x<Image.GetLength(0); x++)
for (var y = 0; y<Image.GetLength(1); y++)
res[x,y] = Image[x,y] / 256.0;
return res;
}
}
/// http://yann.lecun.com/exdb/mnist/
public class FileReaderMNIST
{
public (NDArray, NDArray) LoadImagesAndLables(string labelFile, string imageFile, int total)
{
var labels = LoadLabel(labelFile, total);
var images = LoadImages(imageFile, total);
var ndImages = new NDArray(np.float64);
var ndLabels = new NDArray(np.int32);
var cc=0;
foreach (var img in images)
{
//ndImages.Data.Add(img);
//ndLabels.Data.Add(labels[cc]);
cc++;
}
return (ndImages, ndLabels);
}
public IEnumerable<NDArray> LoadImages(string imageFile, int total)
{
/*
TRAINING SET IMAGE FILE (train-images-idx3-ubyte):
[offset] [type] [value] [description]
0000 32 bit integer 0x00000803(2051) magic number
0004 32 bit integer 60000 number of images
0008 32 bit integer 28 number of rows
0012 32 bit integer 28 number of columns
0016 unsigned byte ?? pixel
0017 unsigned byte ?? pixel
........
xxxx unsigned byte ?? pixel
Pixels are organized row-wise. Pixel values are 0 to 255. 0 means background (white), 255 means foreground (black). */
using (var raw = File.OpenRead(imageFile))
{
using(var gz = new GZipStream(raw, CompressionMode.Decompress))
{
using (var reader = new BinaryReaderMSB(gz))
{
var header = reader.ReadInt32MSB();
if (header != 0x803) throw new InvalidDataException(header.ToString("x"));
var itemCount = reader.ReadInt32MSB();
var rowCount = reader.ReadInt32MSB();
var colCount = reader.ReadInt32MSB();
for(var i=0; i< total; i++)
{
var nd = np.zeros(rowCount, colCount);
for(var r=0; r<rowCount; r++)
for(var c=0; c<colCount; c++)
nd[r, c] = reader.ReadByte();
yield return nd;
}
}
}
}
}
public byte[] LoadLabel(string labelFile, int total)
{
/*
TRAINING SET LABEL FILE (train-labels-idx1-ubyte):
[offset] [type] [value] [description]
0000 32 bit integer 0x00000801(2049) magic number (MSB first)
0004 32 bit integer 60000 number of items
0008 unsigned byte ?? label
0009 unsigned byte ?? label
........
xxxx unsigned byte ?? label
The labels values are 0 to 9.
*/
using (var raw = File.OpenRead(labelFile))
{
using (var gz = new GZipStream(raw, CompressionMode.Decompress))
{
using (var reader = new BinaryReaderMSB(gz))
{
// Check Header / Magic Number
var header = reader.ReadInt32MSB();
if (header != 0x801) throw new InvalidDataException(header.ToString("x"));
var itemCount = reader.ReadInt32MSB();
return reader.ReadBytes(total);
}
}
}
}
}
}