In [2]:
import numpy as np
import cv2 # OpenCV
import matplotlib
import matplotlib.pyplot as plt
import collections

In [3]:
# iscrtavanje slika u notebook-u
%matplotlib inline
# prikaz vecih slika
matplotlib.rcParams['figure.figsize'] = 16,12

In [4]:
# keras
from tensorflow import keras
from keras.models import Sequential
from keras.layers.core import Dense, Activation

from tensorflow.keras.optimizers import SGD
from sklearn.cluster import KMeans

2022-11-23 15:12:43.841234: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/stex/Desktop/SoftComputing/sc-2022-siit/env/lib/python3.10/site-packages/cv2/../../lib64:
2022-11-23 15:12:43.841261: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [5]:
def load_image(path):
    return cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB)

def image_gray(image):
    return cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

def image_bin(image_gs):
    height, width = image_gs.shape[0:2]
    image_binary = np.ndarray((height, width), dtype=np.uint8)
    ret, image_bin = cv2.threshold(image_gs, 127, 255, cv2.THRESH_BINARY)
    return image_bin

def invert(image):
    return 255-image

def display_image(image, color=False):
    if color:
        plt.imshow(image)
    else:
        plt.imshow(image, 'gray')

def dilate(image):
    kernel = np.ones((3, 3)) # strukturni element 3x3 blok
    return cv2.dilate(image, kernel, iterations=1)

def erode(image):
    kernel = np.ones((3, 3)) # strukturni element 3x3 blok
    return cv2.erode(image, kernel, iterations=1)

In [6]:
def resize_region(region):
    return cv2.resize(region, (28, 28), interpolation=cv2.INTER_NEAREST)

In [7]:
def scale_to_range(image):
    return image/255

In [8]:
def matrix_to_vector(image):
    return image.flatten()

In [9]:
def prepare_for_ann(regions):
    ready_for_ann = []
    for region in regions:
        scale = scale_to_range(region)
        ready_for_ann.append(matrix_to_vector(scale))
    return ready_for_ann

In [10]:
def convert_output(alphabet):
    nn_outputs = []
    for index in range(len(alphabet)):
        output = np.zeros(len(alphabet))
        output[index] = 1
        nn_outputs.append(output)
    return np.array(nn_outputs)

In [11]:
def create_ann(output_size):
    ann = Sequential()
    ann.add(Dense(128, input_dim=784, activation='sigmoid'))
    ann.add(Dense(output_size, activation='sigmoid'))
    return ann

In [12]:
def train_ann(ann, X_train, y_train, epochs):
    X_train = np.array(X_train, np.float32) # dati ulaz
    y_train = np.array(y_train, np.float32) # zeljeni izlazi na date ulaze
    
    print("\nTraining started...")
    sgd = SGD(learning_rate=0.01, momentum=0.9)
    ann.compile(loss='mean_squared_error', optimizer=sgd)
    ann.fit(X_train, y_train, epochs=epochs, batch_size=1, verbose=0, shuffle=False)
    print("\nTraining completed...")
    return ann

In [13]:
def winner(output):
    return max(enumerate(output), key=lambda x: x[1])[0]

In [44]:
def display_result(outputs, alphabet):
    result = []
    for output in outputs:
        result.append(alphabet[winner(output)])
    return result

In [14]:
def select_roi_with_distances(image_orig, image_bin):
    contours, hierarchy = cv2.findContours(image_bin.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    sorted_regions = [] # lista sortiranih regiona po X osi
    regions_array = []
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour) # koordinate i velicina granicnog pravougaonika
        area = cv2.contourArea(contour)
        if area > 100 and h < 100 and w < 100 and ((h > 15 and w > 17) or (w>15 and h > 30 and area < 286)):
            # kopirati [y:y+h+1, x:x+w+1] sa binarne slike i smestiti u novu sliku
            # oznaciti region pravougaonikom na originalnoj slici sa rectangle funkcijom
            region = image_bin[y:y+h+1, x:x+w+1]
            regions_array.append([resize_region(region), (x, y, w, h)])
            cv2.rectangle(image_orig, (x, y), (x + w, y + h), (0, 255, 0), 2)
        
    
    regions_array = sorted(regions_array, key=lambda x: x[1][0])
    sorted_regions = [region[0] for region in regions_array]
    return image_orig, sorted_regions

In [28]:
def process_image(image_path):
    image_color = load_image(image_path)
    img = image_bin(image_gray(image_color))
    selected_regions, letters = select_roi_with_d
    


In [42]:
train_letters = []
alphabet = ['A', 'E', 'L','M','S','U','V','W','a','b','c','d','e','f','g','h',
            'i','k','l','m','n','o','p','ss','r','s','t','u','w','z']



a1_letters = process_image('data2/a1.jpg')
e2_letters = process_image('data2/e2.jpg')
l1_letters = process_image('data2/l1.jpg')
m1_letters = process_image('data2/m1.jpg')
s1_letters = process_image('data2/s1.jpg')
u1_letters = process_image('data2/u1.jpg')
v1_letters = process_image('data2/v1.jpg')
w1_letters = process_image('data2/w1.jpg')

print(len(a1_letters))
print(len(e2_letters))

train_letters.append(a1_letters[0]) #A
train_letters.append(e2_letters[0]) #E
train_letters.append(l1_letters[0]) #L
train_letters.append(m1_letters[0]) #M
train_letters.append(s1_letters[0]) #S
train_letters.append(u1_letters[0]) #U
train_letters.append(v1_letters[0]) #V
train_letters.append(w1_letters[0]) #W

train_letters.append(a1_letters[9]) #a
train_letters.append(a1_letters[2]) #b
train_letters.append(u1_letters[3]) #c
train_letters.append(v1_letters[6]) #d
train_letters.append(a1_letters[3]) #e
train_letters.append(w1_letters[8]) #f
train_letters.append(l1_letters[8]) #g
train_letters.append(u1_letters[4]) #h
train_letters.append(a1_letters[4]) #i
train_letters.append(m1_letters[3]) #k
train_letters.append(a1_letters[8]) #l
train_letters.append(u1_letters[1]) #m
train_letters.append(l1_letters[4]) #n
train_letters.append(e2_letters[3]) #o
train_letters.append(a1_letters[7]) #p
train_letters.append(s1_letters[10]) #ss
train_letters.append(a1_letters[1]) #r
train_letters.append(a1_letters[6]) #s
train_letters.append(a1_letters[5]) #t
train_letters.append(v1_letters[7]) #u
train_letters.append(l1_letters[6]) #w
train_letters.append(a1_letters[11]) #z

print(len(train_letters))
print(len(alphabet))

inputs = prepare_for_ann(train_letters)
outputs = convert_output(alphabet)

ann = create_ann(output_size=30)
ann = train_ann(ann,inputs,outputs,epochs=2000)












12
13
30
30

Training started...

Training completed...


In [45]:
result = ann.predict(np.array(inputs[2:4], np.float32))
print(result)
print("\n")
print(display_result(result, alphabet))

[[0.00671631 0.06139344 0.8953208  0.01290652 0.01436201 0.03803584
  0.01858917 0.00497192 0.01199666 0.03143576 0.02107164 0.00139683
  0.01738778 0.02262628 0.00905791 0.02658951 0.00311512 0.0446046
  0.02517244 0.00481373 0.00516149 0.0033474  0.01317918 0.03351578
  0.02276164 0.00216135 0.01476097 0.00903872 0.00583553 0.02333859]
 [0.00888076 0.00333554 0.00799441 0.924788   0.02435404 0.00935531
  0.02390814 0.00449783 0.00689307 0.01313558 0.01756209 0.00459796
  0.01006097 0.0102137  0.01809558 0.01037145 0.00395328 0.00820297
  0.00118765 0.03366873 0.02031758 0.00550851 0.01516607 0.00493315
  0.01787147 0.01685095 0.00958666 0.02053797 0.00708586 0.00588518]]


['L', 'M']


In [58]:
test_letters = process_image('data2/w2.jpg')

test_inputs = prepare_for_ann(test_letters)
result = ann.predict(np.array(test_inputs, np.float32))
print(display_result(result, alphabet))

['W', 'a', 's', 'c', 'h', 'S', 't', 'r', 'a', 'ss', 'e']
