# Deep Learning Ball detection Algorithm
### This project is suposed to work on ITAndroids soccer playing robot

#### First, let's load some libraries

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
import tensorflow as tf
from keras import layers
from keras.layers import Input, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D
from keras.layers import AveragePooling2D, MaxPooling2D, Dropout, GlobalMaxPooling2D, GlobalAveragePooling2D
from keras.layers import Reshape, Flatten, Lambda
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.merge import concatenate
from keras.models import Model
from keras.preprocessing import image
from keras.utils import layer_utils
from keras.utils.data_utils import get_file
from keras.applications.imagenet_utils import preprocess_input
import pydot
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.utils import plot_model
from keras.optimizers import SGD, Adam, RMSprop

import keras.backend as K
K.set_image_data_format('channels_last')
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow

%matplotlib inline

### Then, let's define the neural network model

In [None]:
def minha2_skip(input_size, cosize):

    input_image = Input(shape=(input_size, cosize, 3))

    # Layer 1
    #Originalmente era quatro, mas coloquei oito
    x = Conv2D(8, (3,3), strides=(1,1), padding='same', name='conv_1', use_bias=False)(input_image)
    x = BatchNormalization(name='norm_1')(x)
    x = LeakyReLU(alpha=0.1)(x)
    
    
        # Layer 2 - 5
    for i in range(0,4):
        x = Conv2D(8*(2**i), (3,3), strides=(1,1), padding='same', name='conv_' + str(i+2), use_bias=False)(x)
        x = BatchNormalization(name='norm_' + str(i+2))(x)
        x = LeakyReLU(alpha=0.1)(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)

        # Layer 6
    x = Conv2D(128, (3,3), strides=(1,1), padding='same', name='conv_6', use_bias=False)(x)
    x = BatchNormalization(name='norm_6')(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(1,1), padding='same')(x)
    
    skip_connection = x

        # Layer 7 - 8
    for i in range(0,2):
        x = Conv2D(256, (3,3), strides=(1,1), padding='same', name='conv_' + str(i+7), use_bias=False)(x)
        x = BatchNormalization(name='norm_' + str(i+7))(x)
        x = LeakyReLU(alpha=0.1)(x)
        
    skip_connection = Conv2D(256, (1,1), strides=(1,1), padding='same', name='conv_skip', use_bias=False)(skip_connection)
    skip_connection = BatchNormalization(name='norm_21')(skip_connection)
    skip_connection = LeakyReLU(alpha=0.1)(skip_connection)
    #skip_connection = Lambda(space_to_depth_x2)(skip_connection)

    x = concatenate([skip_connection, x])
        
        
    x = Conv2D(5, (1,1), strides = (1,1), padding = 'same', name = 'conv_9', use_bias = True)(x)
        
    model = Model(inputs = input_image, outputs=x, name='Fast')
        
    
    return model

In [None]:
rede = minha2_skip(240,320)
rede.summary()

#### Now, let's impor libraries to load images to train the network

In [None]:
import glob
import bisect
from os.path import basename
import xml.etree.ElementTree
import numpy as np
import cv2

pasta_atual = os.getcwd()

In [None]:
def Metodo2(pasta):  #imagem em 1/2 certo com minha rede
    
    nomesdarquivos = glob.glob(pasta + '/*.xml')
    fotos = glob.glob(pasta + '/*.png')
    maisfotos = glob.glob(pasta+ '/*.jpg')
    for nome in maisfotos:
        fotos.append(nome)    
    nomesdarquivos.sort()
    fotos.sort()
    nomesdasanotacoes = list()
    for nomeindo in nomesdarquivos:
        bisect.insort(nomesdasanotacoes, basename(nomeindo).split('.')[0])
        
    
    para_imprimir = list()
    entrada = list()
    for nome in nomesdasanotacoes:
        e = xml.etree.ElementTree.parse(pasta+'/'+nome+'.xml').getroot()
        ball = e.find('object')
        if ball is None:
            coisa = np.zeros((15,20,5), dtype=float)
            para_imprimir.append(coisa)
        else:
            coisa = np.zeros((15,20,5), dtype=float)
            xmin = float(ball[4][0].text)/2
            ymin = float(ball[4][1].text)/2
            xmax = float(ball[4][2].text)/2
            ymax = float(ball[4][3].text)/2
            Xc = (xmin + xmax)/2
            posicx = int(Xc/16)
            Xc = Xc - posicx*16
            Xc = Xc/16
            Yc = (ymin + ymax)/2
            posicy = int(Yc/16)
            Yc = Yc - posicy*16
            Yc = Yc/16
            largura = (xmax - xmin)/320
            altura = (ymax - ymin)/320
            coisa[posicy][posicx] = (1,Xc, Yc, largura, altura)
            
            para_imprimir.append(coisa)
        
    Y = np.array(para_imprimir)

    for arquivo in fotos:
        img = cv2.imread(arquivo)
        img = cv2.resize(img, (320,240), interpolation = cv2.INTER_AREA)
        entrada.append(img)
        
    
    X = np.array(entrada)
    X = X.astype(float)
    X = X/255
    
    return X, Y

#### Loading images to train the network

In [None]:
x , y = Metodo2(pasta_atual + '/teste2')
imshow(x[0])
print("Shape of X", x.shape)
print("Shape of Y", y.shape)

#### Let's define the cost function and train the Convolutional Neural Network

In [None]:
def custo_yolo(y_true, y_pred):
    Xc_true = y_true[...,1]
    Xc_pred = tf.sigmoid(y_pred[..., 1])        ##mudei aqui
    Yc_true = y_true[..., 2]
    Yc_pred = tf.sigmoid(y_pred[..., 2])        ##mudei aqui
    W_true = tf.sqrt(tf.abs(y_true[...,3]))
    H_true = tf.sqrt(tf.abs(y_true[...,4]))
    #WH_pred = tf.sqrt(tf.abs(y_pred[..., 3:5]))
    WH_pred = tf.sqrt(tf.abs(tf.exp(y_pred[..., 3:5])*np.reshape(ANCHORS, [1,1,BOX,2])))
    W_pred = WH_pred[...,0]
    H_pred = WH_pred[...,1]
    confidence_true = y_true[...,0]
    confidence_pred = tf.sigmoid(y_pred[...,0])
    class_true = y_true[...,0]
    
    Xc_pred = tf.multiply(class_true,Xc_pred)
    Yc_pred = tf.multiply(class_true, Yc_pred)
    H_pred = tf.multiply(class_true, H_pred)
    W_pred = tf.multiply(class_true, W_pred)
    
    loss_xy = (tf.square(Xc_pred - Xc_true) + tf.square(Yc_pred - Yc_true))
    loss_xy = 5*tf.reduce_sum(loss_xy)
    
    #W_pred = tf.cast(W_pred, dtype = 'float32')
    #W_true = tf.cast(W_true, dtype = 'float32')
    #H_pred = tf.cast(H_pred, dtype = 'float32')
    #H_true = tf.cast(H_true, dtype = 'float32')
    #W_dif = tf.square(tf.sqrt(tf.abs(W_pred)) - tf.sqrt(tf.abs(W_true)))
    #H_dif = tf.square(tf.sqrt(tf.abs(H_true)) - tf.sqrt(tf.abs(H_pred)))
    loss_wh = (tf.square(W_true - W_pred) + tf.square(H_pred - H_true))
    loss_wh = 10*tf.reduce_sum(loss_wh)  #era 10
    
    no_object = 1 - class_true
    diff_conf = tf.square(confidence_true - confidence_pred)
    loss_no_object = (tf.multiply(diff_conf, no_object))
    loss_no_object = 0.5*tf.reduce_sum(loss_no_object)  #era 0.5
    
    loss_object = 5*tf.reduce_sum(tf.multiply(class_true, diff_conf)) #era 5
    
    #loss_class = tf.nn.sigmoid_cross_entropy_with_logits(labels = class_true, logits = class_pred)
    #loss_class = tf.reduce_sum(loss_class)
    
    loss = loss_xy + loss_wh + loss_no_object + loss_object #+ loss_class
    
    
    return loss

In [None]:
optimizer = Adam (lr=0.5e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)

rede.compile(loss=custo_yolo, optimizer=optimizer)

In [None]:
rede.fit(x, y, epochs=100, validation_split=0.2, batch_size=3)

rede.compile(loss = mean_squared_error, optimizer='adam', metrics = ['accuracy'])

rede.save(pasta_atual+ '/teste1.hdf5')

## To run the nerual network with an image, we start here

In [None]:
from keras.models import load_model
from keras import layers
import os
import cv2
from matplotlib.pyplot import imshow
import numpy as np
import time

os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
ANCHORS = [5,5]
BOX = 1

In [None]:
def sigmoid(x):
    return 1/(1 + np.exp(np.negative(x)))

In [None]:
def respostafinal(Y):
    Y = np.reshape(Y, (15,20,5))
    classe = sigmoid(Y[..., 0])
    classe = np.reshape(classe, (15,20))
    i,j = np.unravel_index(np.argmax(classe), np.shape(classe))
    resposta = Y[i,j, :]
    resposta[0] = sigmoid(resposta[0])
    resposta[1:3] = sigmoid(resposta[1:3])
    resposta[3:5] = (np.exp(resposta[3:5]))*np.reshape(ANCHORS, [1,1,BOX,2])
    return resposta, j, i

In [None]:
def arrumando50(res,i , j):
    probabilidade = res[0]
    Xc = (res[1] + i*16)*2
    Yc = (res[2] + j*16)*2
    H = res[4]*640
    W = res[3]*640
    vetor = [probabilidade, Xc, Yc, W, H]
    return vetor

In [None]:
def plotarimagem(resposta, image):
    Xc = int(resposta[1])
    Yc = int(resposta[2])
    largura = resposta[3]
    altura = resposta[4]
    XMAX = int(2*Xc + largura)/2
    XMIN = int(2*Xc - largura)/2
    YMAX = int(2*Yc + altura)/2
    YMIN = int(2*Yc - altura)/2
    valor = (XMIN, YMIN, XMAX, YMAX)
    cv2.rectangle(image, (XMIN, YMIN), (XMAX, YMAX), [0, 124, 245], 5)
    cv2.rectangle(image, (Xc, Yc), (Xc+2, Yc+2), [255, 0, 0], 20)
    #cv2.imwrite(pasta_atual + '/apresentacao.jpg', img)
    image = image/255
    imshow(image)

### Let's load a Keras model

In [None]:
pasta_atual = os.getcwd()
yolo = load_model(pasta_atual+ '/teste1.hdf5')

### Let's load an image and estimate the result

In [None]:
#img = cv2.imread(lucas[i])
img = cv2.imread(pasta_atual + '/nova4.png')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = np.array(img)
ima = cv2.resize(img, (320,240), interpolation = cv2.INTER_AREA)
ima = ima.astype(float)
img = img.astype(float)
ima = ima/255
ima = np.reshape(ima, (1, 240, 320, 3))


start1 = time.time()
resposta = yolo.predict(ima)
#end1 = time.time()

#start2 = time.time()
a, b,c = respostafinal(resposta)
resposta = arrumando50(a,b,c)
end2 = time.time()

print(resposta)
#resposta[4] = resposta[4]-1000
plotarimagem(resposta, img)
print(end2-start1)
#print(end2-start2)