In [None]:
# Take the 50 randomline images and process them into a useable 12x12 pixel
#    format. The 12x12 images are then saved as labeled csvs.


In [1]:
%matplotlib inline
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import scipy.misc
import imageio
import cv2
from PIL import Image
import pandas as pd
import sys
import os
import csv
import pickle
datapath = "/home/kenny/Dropbox/Coding/MLArtUIowa/Data/" #HomeDesktop

In [2]:
#Crop out the white space in the 50 png files from the website

left = 112
top = 34
right = 331
bottom = 253
unique_names = np.array(["yim" + str(i) + ".png" for i in range(50)])  #Website Data
for name in unique_names:
    img = Image.open(datapath + "unprocessed_50imgs/" + name)
    cropped_img = img.crop((left, top, right, bottom)) 
    cropped_img.save(datapath + "unprocessed_50imgs/cropped" + name)
    

In [3]:
# Resize images to 12x12 pixels


for name in unique_names:
    img = cv2.imread(datapath + "unprocessed_50imgs/cropped" + name, cv2.IMREAD_UNCHANGED)
    resized_img = cv2.resize(img, (12, 12))
    cv2.imwrite(datapath + "processed_50imgs/" + name, resized_img)


In [4]:
# Create csv where the each row is a different image. The first 144 columns
#  are pixel values and the last column is the image number (from 0 - 49)

format = '.png'
fileList = []
#print(myDir)
#for root, dirs, files in os.walk(datapath + "Data/processed_randomline_images/", topdown=False):
for root, dirs, files in os.walk(datapath + "processed_50imgs/", topdown=False):
    for name in sorted(files):
        if name.endswith(format):
            fullName = os.path.join(root, name)
            fileList.append(fullName)

for file in fileList:
    #print(file)
    img_file = Image.open(file)
    # img_file.show()

    # get original image parameters...
    width, height = img_file.size
    format = img_file.format
    mode = img_file.mode

    # Make image Greyscale
    img_grey = img_file.convert('L')
    #img_grey.save('result.png')
    #img_grey.show()
    
    # Save Greyscale values
    value = np.asarray(img_grey.getdata(), dtype=np.int).reshape((img_grey.size[1], img_grey.size[0]))
    value = value.flatten()
    # print(value)
    #with open(datapath + "Data/imgpixels_label.csv", 'a') as f:Code
    with open(datapath + "imgpixels_label.csv", 'a') as f:
        writer = csv.writer(f)
        writer.writerow(np.append(value,file[63:-4])) # Change these indices based on your file path

In [5]:
# Read website grade data and create a csv where the first column is the img number and the
#  second column is the user-assigned grade

with open(datapath + "website_image_grades.csv", "r") as f:
    reader = csv.reader(f, delimiter=',')
    headers = next(reader)
    data = list(reader)
    data = np.array(data)
    
image_names = data[:,1]
image_grades = data[:,2]

for i in range(len(image_names)):
    with open(datapath + "label_grade.csv", 'a') as f:
        writer = csv.writer(f)
        writer.writerow([int(image_names[i][3:-4]),int(image_grades[i])])

In [6]:
# Load data

imgpixels_label = np.loadtxt(datapath + "imgpixels_label.csv", 
                        delimiter=",")
label_grade = np.loadtxt(datapath + "label_grade.csv", 
                        delimiter=",")
# Create array of user-submitted data with structure (label[0], grade[1], pixel values[2:145])
label_grade_imgpixels = np.zeros((len(label_grade[:,1]), 144 + len(label_grade[0,:])))
for i in range(len(label_grade[:,1])):
    for j in range(50):
        if label_grade[i,0] == imgpixels_label[j,144]:
            label_grade_imgpixels[i] = np.append(label_grade[i].astype(int), imgpixels_label[j,:144]) # img label, grade, pixels...

In [7]:
image_size = 12 # width and length
no_of_different_grades = 10 #  i.e. 0, 1, 2, 3, ..., 9
image_pixels = image_size * image_size

# Compress possible pixel values to the interval [0.01, 1] 
train_imgs = np.asfarray(label_grade_imgpixels[:,2:146], dtype='float32') * 0.99/255 + 0.01
label_grade_pixels = np.concatenate((np.asfarray(label_grade_imgpixels[:,0:2], dtype='float32'),train_imgs), axis=1)
train_imgs.shape

(3280, 144)

Add headings to label_grade_pixels array and then write to csv

In [8]:
len(label_grade_pixels)

3280

In [9]:
header = []
for i in range(len(label_grade_pixels[1])):
    if i==0:
        header.append("Label")
    if i==1:
        header.append("Grade")
    if i>1:
        header.append("Pixel" + str(i-1))
        

In [10]:
label_grade_pixels = pd.DataFrame(data=label_grade_pixels, columns=header)

In [11]:
label_grade_pixels.to_csv("../Data/label_grade_pixels.csv")

 Break up the total dataset into 70% training and 30% testing (some quick googling suggested this is a typical breakup for small datasets)
 
 Pandas DataFrame object class makes this simple with the df.sample and df.drop functions

In [19]:
num_train = 0.7*(label_grade_pixels.shape[0])
num_test = 0.3*(label_grade_pixels.shape[0])

In [42]:
train_df=label_grade_pixels.sample(frac=0.7)
test_df=label_grade_pixels.drop(train_df.index)

In [45]:
train_df.to_csv("../Data/train.csv")
test_df.to_csv("../Data/test.csv")

In [46]:
# Map all image grades to One-Hot encoding
a = label_grade_imgpixels[:,1]
OneHotGrades = np.zeros((a.size, int(a.max()+1)))
OneHotGrades[np.arange(a.size),a.astype(int)] = 1
OneHotGrades

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 1., ..., 0., 0., 0.]])

In [11]:
# Get rid of all zeroes o ones
OneHotGrades[OneHotGrades==0] = 0.01
OneHotGrades[OneHotGrades==1] = 0.99

In [56]:
OneHotGrades.shape

(6560, 10)

In [None]:
with open(datapath + 'Data/pickled_data.pkl', 'bw') as fh:
    data = (label_grade_imgpixels, OneHotGrades, train_imgs)
    pickle.dump(data, fh)

In [None]:
with open(datapath + "Data/ANN_epoch10000_200H1.pkl", "rb") as fh:
    data = pickle.load(fh)

In [20]:
with open(datapath + "Data/ANN_epoch10000_200H1.pkl", "rb") as fh:
    data = pickle.load(fh)

AttributeError: Can't get attribute 'NeuralNetwork' on <module '__main__'>