In [1]:
import numpy as np
import cv2
from PIL import Image
from matplotlib import pyplot as plt
%matplotlib inline
import os
from os import listdir
from os.path import isfile, join
import pandas as pd
import random

In [2]:
def load_images_from_folder(folder_name):
    train_data=[]
    folder = 'filtered_extracted_images/' + folder_name
    files = os.listdir(folder)
    files_count = len(files)
    files_filtered = files
    filter_count = 5000
    if files_count > filter_count:
        random.seed(0)
        files_filtered = random.sample(files , filter_count)
        
    for filename in files_filtered:
        src_img = cv2.imread(os.path.join(folder,filename),cv2.IMREAD_GRAYSCALE)
            
        orig_height, orig_width = src_img.shape
        width = 45
        height = int(width * orig_height / orig_width)
        src_img = cv2.resize(src_img, dsize=(width, height), interpolation=cv2.INTER_AREA)

        kernel = np.ones((2,2), np.uint8)
        src_img = cv2.erode(src_img, kernel, iterations = 1)
        
        PIXEL_SET = 255
        kernel_size = 21
        normalized_mean = 20
        #black background
        bin_img = cv2.adaptiveThreshold(src_img, PIXEL_SET, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, kernel_size, normalized_mean)
        # Noise Removal
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
        final_thr = cv2.morphologyEx(bin_img, cv2.MORPH_CLOSE, kernel)
        
        contours, hierarchy = cv2.findContours(final_thr, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        min_x, min_y = width, height
        max_x = max_y = 0
        for cnt in contours:
            (x,y,w,h) = cv2.boundingRect(cnt)
            min_x, max_x = min(x, min_x), max(x+w, max_x)
            min_y, max_y = min(y, min_y), max(y+h, max_y)
        final_thr = final_thr[ min_y:max_y, min_x:max_x ]

        # resize to 32*32
        im_resize = cv2.resize(final_thr,(32,32))
        
        #img_seen = cv2.resize(im_resize,(200,200))
        #cv2.imshow("train", img_seen)
        #cv2.waitKey(0)
        # and finally destroy/close all open windows
        #cv2.destroyAllWindows()
        im_resize = np.reshape(im_resize,(1024, 1)) / 255.0
        im_resize = np.where(im_resize > 0.5, 1, 0)
        train_data.append(im_resize)
        
    return train_data
        

In [3]:
'''
def load_images_from_folder(folder_name):
    train_data=[]
    folder = 'filtered_extracted_images/' + folder_name
    files = os.listdir(folder)
    files_count = len(files)
    files_filtered = files
    filter_count = 10000
    if files_count > filter_count:
        random.seed(0)
        files_filtered = random.sample(files , filter_count)
        
    for filename in files_filtered:
        img = cv2.imread(os.path.join(folder,filename),cv2.IMREAD_GRAYSCALE)
        img=~img
        if img is not None:
            ret,thresh=cv2.threshold(img,127,255,cv2.THRESH_BINARY)
            ctrs,heirarchy=cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
            cnt=sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
            w=int(28)
            h=int(28)
            maxi=0
            for c in cnt:
                x,y,w,h=cv2.boundingRect(c)
                maxi=max(w*h,maxi)
                if maxi==w*h:
                    x_max=x
                    y_max=y
                    w_max=w
                    h_max=h
            im_crop= thresh[y_max:y_max+h_max+10, x_max:x_max+w_max+10]
            im_resize = cv2.resize(im_crop,(28,28))
            cv2.imshow("train", im_resize)
            cv2.waitKey(0)
            # and finally destroy/close all open windows
            cv2.destroyAllWindows()
            im_resize = np.reshape(im_resize,(784,1))

            train_data.append(im_resize)
    return train_data
'''                

'\ndef load_images_from_folder(folder_name):\n    train_data=[]\n    folder = \'filtered_extracted_images/\' + folder_name\n    files = os.listdir(folder)\n    files_count = len(files)\n    files_filtered = files\n    filter_count = 10000\n    if files_count > filter_count:\n        random.seed(0)\n        files_filtered = random.sample(files , filter_count)\n        \n    for filename in files_filtered:\n        img = cv2.imread(os.path.join(folder,filename),cv2.IMREAD_GRAYSCALE)\n        img=~img\n        if img is not None:\n            ret,thresh=cv2.threshold(img,127,255,cv2.THRESH_BINARY)\n            ctrs,heirarchy=cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)\n            cnt=sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])\n            w=int(28)\n            h=int(28)\n            maxi=0\n            for c in cnt:\n                x,y,w,h=cv2.boundingRect(c)\n                maxi=max(w*h,maxi)\n                if maxi==w*h:\n                    x_max=x

In [4]:
data_len = 0

import shutil
import os
FILENAME = 'train_final.csv'
if FILENAME in os.listdir():
    os.remove(FILENAME)

def save_file(data):
    df=pd.DataFrame(data,index=None)
    if FILENAME in os.listdir():
        df.to_csv(FILENAME, mode='a', index=False, header=False)
    else:
        df.to_csv(FILENAME, mode='a', index=False)


In [5]:
data0=load_images_from_folder('0')

for i in range(0,len(data0)):
    data0[i]=np.append(data0[i],['0'])
    
save_file(data0)    
data_len += len(data0)
del data0
print(data_len)


5000


In [6]:
data1=load_images_from_folder('1')

for i in range(0,len(data1)):
    data1[i]=np.append(data1[i],['1'])

save_file(data1)    
data_len += len(data1)
del data1
print(data_len)
    

10000


In [7]:
data2=load_images_from_folder('2')

for i in range(0,len(data2)):
    data2[i]=np.append(data2[i],['2'])

save_file(data2)    
data_len += len(data2)
del data2
print(data_len)
    

15000


In [8]:
data3=load_images_from_folder('3')

for i in range(0,len(data3)):
    data3[i]=np.append(data3[i],['3'])

save_file(data3)    
data_len += len(data3)
del data3
print(data_len)
    

20000


In [9]:
data4=load_images_from_folder('4')

for i in range(0,len(data4)):
    data4[i]=np.append(data4[i],['4'])

save_file(data4)    
data_len += len(data4)
del data4
print(data_len)
    

25000


In [10]:
data5=load_images_from_folder('5')

for i in range(0,len(data5)):
    data5[i]=np.append(data5[i],['5'])

save_file(data5)    
data_len += len(data5)
del data5
print(data_len)
    

28545


In [11]:
data6=load_images_from_folder('6')

for i in range(0,len(data6)):
    data6[i]=np.append(data6[i],['6'])

save_file(data6)    
data_len += len(data6)
del data6
print(data_len)
    

31663


In [12]:
data7=load_images_from_folder('7')

for i in range(0,len(data7)):
    data7[i]=np.append(data7[i],['7'])

save_file(data7)    
data_len += len(data7)
del data7
print(data_len)
    

34572


In [13]:
data8=load_images_from_folder('8')

for i in range(0,len(data8)):
    data8[i]=np.append(data8[i],['8'])

save_file(data8)    
data_len += len(data8)
del data8
print(data_len)
    

37640


In [14]:
data9=load_images_from_folder('9')

for i in range(0,len(data9)):
    data9[i]=np.append(data9[i],['9'])

save_file(data9)    
data_len += len(data9)
del data9
print(data_len)
    

41377


In [15]:
#assign 'a'=10
data10=load_images_from_folder('a')

for i in range(0,len(data10)):
    data10[i]=np.append(data10[i],['10'])

save_file(data10)    
data_len += len(data10)
del data10
print(data_len)
    

46377


In [16]:
#assign 'b'=11
data11=load_images_from_folder('b')

for i in range(0,len(data11)):
    data11[i]=np.append(data11[i],['11'])

save_file(data11)    
data_len += len(data11)
del data11
print(data_len)
    

51377


In [17]:
#assign 'c'=12
data12=load_images_from_folder('c')

for i in range(0,len(data12)):
    data12[i]=np.append(data12[i],['12'])

save_file(data12)    
data_len += len(data12)
del data12
print(data_len)
    

56377


In [18]:
#assign 'd'=13
data13=load_images_from_folder('d')

for i in range(0,len(data13)):
    data13[i]=np.append(data13[i],['13'])

save_file(data13)    
data_len += len(data13)
del data13
print(data_len)
    

61229


In [19]:
#assign 'e'=14
data14=load_images_from_folder('e')

for i in range(0,len(data14)):
    data14[i]=np.append(data14[i],['14'])

save_file(data14)    
data_len += len(data14)
del data14
print(data_len)
    

64232


In [20]:
#assign 'f'=15
data15=load_images_from_folder('f')

for i in range(0,len(data15)):
    data15[i]=np.append(data15[i],['15'])

save_file(data15)    
data_len += len(data15)
del data15
print(data_len)
    

67944


In [21]:
#assign 'g'=16
data16=load_images_from_folder('g')

for i in range(0,len(data16)):
    data16[i]=np.append(data16[i],['16'])

save_file(data16)    
data_len += len(data16)
del data16
print(data_len)
    

69636


In [22]:
#assign 'h'=17
data17=load_images_from_folder('h')

for i in range(0,len(data17)):
    data17[i]=np.append(data17[i],['17'])

save_file(data17)    
data_len += len(data17)
del data17
print(data_len)
    

71100


In [23]:
#assign 'i'=18
data18=load_images_from_folder('i')

for i in range(0,len(data18)):
    data18[i]=np.append(data18[i],['18'])

save_file(data18)    
data_len += len(data18)
del data18
print(data_len)
    

76100


In [24]:
#assign 'j'=19
data19=load_images_from_folder('j')

for i in range(0,len(data19)):
    data19[i]=np.append(data19[i],['19'])

save_file(data19)    
data_len += len(data19)
del data19
print(data_len)
    

77636


In [25]:
#assign 'k'=20
data20=load_images_from_folder('k')

for i in range(0,len(data20)):
    data20[i]=np.append(data20[i],['20'])

save_file(data20)    
data_len += len(data20)
del data20
print(data_len)


80710


In [26]:
#assign 'l'=21
data21=load_images_from_folder('l')

for i in range(0,len(data21)):
    data21[i]=np.append(data21[i],['21'])

save_file(data21)    
data_len += len(data21)
del data21
print(data_len)
    

81727


In [27]:
#assign 'm'=22
data22=load_images_from_folder('m')

for i in range(0,len(data22)):
    data22[i]=np.append(data22[i],['22'])

save_file(data22)    
data_len += len(data22)
del data22
print(data_len)
    

84203


In [28]:
#assign 'n'=23
data23=load_images_from_folder('n')

for i in range(0,len(data23)):
    data23[i]=np.append(data23[i],['23'])

save_file(data23)    
data_len += len(data23)
del data23
print(data_len)
    

89203


In [29]:
#assign 'o'=24
data24=load_images_from_folder('o')

for i in range(0,len(data24)):
    data24[i]=np.append(data24[i],['24'])

save_file(data24)    
data_len += len(data24)
del data24
print(data_len)
    

89652


In [30]:
#assign 'p'=25
data25=load_images_from_folder('p')

for i in range(0,len(data25)):
    data25[i]=np.append(data25[i],['25'])

save_file(data25)    
data_len += len(data25)
del data25
print(data_len)
    

92332


In [31]:
#assign 'q'=26
data26=load_images_from_folder('q')

for i in range(0,len(data26)):
    data26[i]=np.append(data26[i],['26'])

save_file(data26)    
data_len += len(data26)
del data26
print(data_len)


93562


In [32]:
#assign 'r'=27
data27=load_images_from_folder('r')

for i in range(0,len(data27)):
    data27[i]=np.append(data27[i],['27'])

save_file(data27)    
data_len += len(data27)
del data27
print(data_len)
    

96233


In [33]:
#assign 's'=28
data28=load_images_from_folder('s')

for i in range(0,len(data28)):
    data28[i]=np.append(data28[i],['28'])

save_file(data28)    
data_len += len(data28)
del data28
print(data_len)
    

97646


In [34]:
#assign 't'=29
data29=load_images_from_folder('t')

for i in range(0,len(data29)):
    data29[i]=np.append(data29[i],['29'])

save_file(data29)    
data_len += len(data29)
del data29
print(data_len)
    

100920


In [35]:
#assign 'u'=30
data30=load_images_from_folder('u')

for i in range(0,len(data30)):
    data30[i]=np.append(data30[i],['30'])

save_file(data30)    
data_len += len(data30)
del data30
print(data_len)
    

102189


In [36]:
#assign 'v'=31
data31=load_images_from_folder('v')

for i in range(0,len(data31)):
    data31[i]=np.append(data31[i],['31'])

save_file(data31)    
data_len += len(data31)
del data31
print(data_len)
    

103747


In [37]:
#assign 'w'=32
data32=load_images_from_folder('w')

for i in range(0,len(data32)):
    data32[i]=np.append(data32[i],['32'])

save_file(data32)    
data_len += len(data32)
del data32
print(data_len)
    

104303


In [38]:
#assign 'x'=33
data33=load_images_from_folder('x')

for i in range(0,len(data33)):
    data33[i]=np.append(data33[i],['33'])

save_file(data33)    
data_len += len(data33)
del data33
print(data_len)


109303


In [39]:
#assign 'y'=34
data34=load_images_from_folder('y')

for i in range(0,len(data34)):
    data34[i]=np.append(data34[i],['34'])

save_file(data34)    
data_len += len(data34)
del data34
print(data_len)
    

114303


In [40]:
#assign 'z'=35
data35=load_images_from_folder('z')

for i in range(0,len(data35)):
    data35[i]=np.append(data35[i],['35'])

save_file(data35)    
data_len += len(data35)
del data35
print(data_len)


119303


In [41]:
#assign '-'=36
data36=load_images_from_folder('-')

for i in range(0,len(data36)):
    data36[i]=np.append(data36[i],['36'])

save_file(data36)    
data_len += len(data36)
del data36
print(data_len)
    

124303


In [42]:
#assign '+' = 37
data37=load_images_from_folder('+')

for i in range(0,len(data37)):
    data37[i]=np.append(data37[i],['37'])

save_file(data37)
data_len += len(data37)
del data37
print(data_len)
    

129303


In [43]:
#assign '('=38
data38=load_images_from_folder('(')

for i in range(0,len(data38)):
    data38[i]=np.append(data38[i],['38'])

save_file(data38)    
data_len += len(data38)
del data38
print(data_len)
    

134303


In [44]:
#assign ')'=39
data39=load_images_from_folder(')')

for i in range(0,len(data39)):
    data39[i]=np.append(data39[i],['39'])

save_file(data39)    
data_len += len(data39)
del data39
print(data_len)
    

139303


In [45]:
#assign '='=40
data40=load_images_from_folder('=')

for i in range(0,len(data40)):
    data40[i]=np.append(data40[i],['40'])

save_file(data40)    
data_len += len(data40)
del data40
print(data_len)
    

144303


In [46]:
#assign '/'=41
data41=load_images_from_folder('div')

for i in range(0,len(data41)):
    data41[i]=np.append(data41[i],['41'])
    
save_file(data41)    
data_len += len(data41)
del data41
print(data_len)
        

145171
