# Redes neuronales convolucionales

Robots Autónomos. Mapas Topológicos visuales

Autores:
    Alejandro Benítez López, Elena Benito Frey, Mario González Carbayo, Isidro López Dominguez, Blanca Martínez Donoso y Ángel Pavón Pérez

In [1]:
import random
import numpy as np
import cv2
from PIL import Image
import pickle
import keras


Using TensorFlow backend.


### Modelo de la red neuronal

Se definen las capas que tendrá el modelo:

In [2]:
def LoadModel():
        model = keras.Sequential([
            keras.layers.Conv2D(96, (11,11),activation='relu', input_shape=(128,128,3)),
            keras.layers.MaxPooling2D(2,2),
            keras.layers.Conv2D(256, (7,7),activation='relu'),
            keras.layers.MaxPooling2D(2,2),
            keras.layers.Conv2D(384, (5,5),activation='relu'),
            keras.layers.MaxPooling2D(2,2),
            keras.layers.Conv2D(384, (5,5),activation='relu'),
            keras.layers.Flatten(),
            keras.layers.Dense(1024),
            keras.layers.Dense(1024),
            keras.layers.Dense(9, activation="softmax") 
        ])
        
        return model

Definimos la función de normalización

In [3]:
def Normalize(image):
    stdR = np.std(image[:,:,0]) #Calculamos la varianza para cada espacio de color
    stdG = np.std(image[:,:,1])
    stdB = np.std(image[:,:,2])
    
    R = np.mean(image[:,:,0]) #La mediana de cada espacio de color
    G = np.mean(image[:,:,1])
    B = np.mean(image[:,:,2])
    
    image[:,:,0] = image[:,:,0] - R
    image[:,:,1] = image[:,:,1] - G
    image[:,:,2] = image[:,:,2] - B
    
    image[:,:,0] = image[:,:,0]/stdR
    image[:,:,1] = image[:,:,1]/stdG
    image[:,:,2] = image[:,:,2]/stdB
    
    minimoR = np.min(image[:,:,0])
    minimoG = np.min(image[:,:,1])
    minimoB = np.min(image[:,:,2])
    
    image[:,:,0] = image[:,:,0]-minimoR # Restamos el valor minimo de cada espacio para que el valor mas bajo sea 0
    image[:,:,1] = image[:,:,1]-minimoG
    image[:,:,2] = image[:,:,2]-minimoB
    
    maximoR = np.max(image[:,:,0]) 
    maximoG = np.max(image[:,:,1])
    maximoB = np.max(image[:,:,2])
    
    factorR = 1/maximoR #Calculamos el factor escala por el que hay que multiplicar cada espacio para que su valor maximo sea 1
    factorG = 1/maximoG
    factorB = 1/maximoB
    
    image[:,:,0] = image[:,:,0] * factorR #Operamos de modo que cada color tiene sus valores comprendidos entre [0,1]
    image[:,:,1] = image[:,:,1] * factorG
    image[:,:,2] = image[:,:,2] * factorB
    
    return image

### Funciones de carga de datos

Establecemos las rutas y constantes

In [4]:
output_dimension = (128,128)

video_route = "./landmarks_videos/landmarks_{:}.avi" #Esto se usaba para la carga de imagenes de training, pero funcionaba mal
video = cv2.VideoCapture(video_route)                #y, como se comenta en la memoria, usamos el video de test para entrenar

video_route_test = "./landmarks_videos/final_test_video.mp4"
video_test = cv2.VideoCapture(video_route_test)

Definimos las funciones de carga de frames de testeo

In [5]:
def getFrameNumber(): #Devueve el numero de frames
    return int(video_test.get(cv2.CAP_PROP_FRAME_COUNT))
    
def getFrames(number): #Cargamos number imagenes consecutivas sobre el video_test
    frames=[]
    for i in range(number):
        ret, frame = video_test.read()
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = Image.fromarray(frame)
        frame = frame.resize(output_dimension, Image.ANTIALIAS)
        frame = np.asarray( frame, dtype="float32" )
        frame = Normalize(frame)
        frames.append(frame)
    return frames

Definimos las funciones de carga de frames para entrenamiento y development

In [6]:
def generateRandomNumbers(end, number):
    res = []
    for i in range(number):
        num = random.randint(0, end-1)
        while num in res:
            num = random.randint(0, end-1)
        res.append(num)
    res.sort()
    
    return res

def getImagesFromTest(rands): 
    frames =[]
    video_route = "./landmarks_videos/final_test_video.mp4"
    video = cv2.VideoCapture(video_route)    
    for i in range(int(video.get(cv2.CAP_PROP_FRAME_COUNT))):
        ret, frame = video.read()
        if i in rands:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = Image.fromarray(frame)
            frame = frame.resize(output_dimension, Image.ANTIALIAS)
            frame = np.asarray( frame, dtype="float32" )
            frame = Normalize(frame)            
            frames.append(frame)
    return frames

def developmentTest(number): #Devuelve number frames aleatorios del video de test
    gt = pickle.load(open("clasificacion_frames.sav", 'rb'))
    labels = []
    rands = generateRandomNumbers(len(gt), number)
    dev = getImagesFromTest(rands)
    for i in rands:
        labels.append(int(gt[i]))
    return dev, labels


### Train y test de la red

Carga de datos

In [7]:
x,y = developmentTest(100) #We get the training set from the test video

x = np.asarray(x)#Change them from list to np array
y = np.asarray(y) 

x_dev, y_dev = developmentTest(100) #We get the development set from the test video

x_dev = np.asarray(x_dev)
y_dev = np.asarray(y_dev) 




Cargamos y compilamos el modelo

In [8]:
model = LoadModel() #We get the model

model.compile(optimizer="Adam",  #Compile it
              loss=keras.losses.SparseCategoricalCrossentropy(),
              metrics=[keras.metrics.SparseCategoricalAccuracy()],
              )

Instructions for updating:
If using Keras pass *_constraint arguments to layers.



Entrenamiento de la red

In [9]:
result = 0  #Best score in the training state
for i in range(100): #Realizamos tantos ciclos de entrenamiento como se indique en el rango
    right=0 #Auxiliar to calculate the accuracy
    model.fit(x, y, epochs=1, batch_size=32) #The training state
    prediction = model.predict(x_dev) #We predict on the dev sey and calculate the amount of matches out of it
    prediction = np.argmax(prediction, axis=1)
    for j in range(len(prediction)): 
        right += (prediction[j]==y_dev[j])
    score = right/len(prediction)
    print(score)
    if(score > result):
        best_model = model #We save the model that got best score on the dev set
        result = score
print("Accuracy sobre el test set {:}".format(result))



Epoch 1/1
0.1
Epoch 1/1
0.1
Epoch 1/1
0.28
Epoch 1/1
0.28
Epoch 1/1
0.1
Epoch 1/1
0.16
Epoch 1/1
0.3
Epoch 1/1
0.28
Epoch 1/1
0.12
Epoch 1/1
0.3
Epoch 1/1
0.24
Epoch 1/1
0.12
Epoch 1/1
0.38
Epoch 1/1
0.12
Epoch 1/1
0.29
Epoch 1/1
0.12
Epoch 1/1
0.15
Epoch 1/1
0.18
Epoch 1/1
0.44
Epoch 1/1
0.49
Epoch 1/1
0.51
Epoch 1/1
0.35
Epoch 1/1
0.68
Epoch 1/1
0.55
Epoch 1/1
0.68
Epoch 1/1
0.67
Epoch 1/1
0.67
Epoch 1/1
0.77
Epoch 1/1
0.77
Epoch 1/1
0.81
Epoch 1/1
0.75
Epoch 1/1
0.76
Epoch 1/1
0.76
Epoch 1/1
0.71
Epoch 1/1
0.86
Epoch 1/1
0.81
Epoch 1/1
0.82
Epoch 1/1
0.82
Epoch 1/1
0.52
Epoch 1/1
0.68
Epoch 1/1
0.69
Epoch 1/1
0.75
Epoch 1/1
0.79
Epoch 1/1
0.83
Epoch 1/1
0.76
Epoch 1/1
0.8
Epoch 1/1
0.84
Epoch 1/1
0.84
Epoch 1/1
0.79
Epoch 1/1
0.77
Epoch 1/1
0.81
Epoch 1/1
0.85
Epoch 1/1
0.82
Epoch 1/1
0.8
Epoch 1/1
0.86
Epoch 1/1
0.85
Epoch 1/1
0.78
Epoch 1/1
0.85
Epoch 1/1
0.89
Epoch 1/1
0.87
Epoch 1/1
0.88
Epoch 1/1
0.89
Epoch 1/1
0.9
Epoch 1/1
0.86
Epoch 1/1
0.86
Epoch 1/1


0.87
Epoch 1/1
0.84
Epoch 1/1
0.87
Epoch 1/1
0.95
Epoch 1/1
0.9
Epoch 1/1
0.88
Epoch 1/1
0.82
Epoch 1/1
0.92
Epoch 1/1
0.89
Epoch 1/1
0.89
Epoch 1/1
0.89
Epoch 1/1
0.9
Epoch 1/1
0.9
Epoch 1/1
0.9
Epoch 1/1
0.9
Epoch 1/1
0.92
Epoch 1/1
0.9
Epoch 1/1
0.9
Epoch 1/1
0.9
Epoch 1/1
0.9
Epoch 1/1
0.9
Epoch 1/1
0.9
Epoch 1/1
0.9
Epoch 1/1
0.9
Epoch 1/1
0.9
Epoch 1/1
0.9
Epoch 1/1
0.9
Epoch 1/1
0.9
Epoch 1/1
0.9
Epoch 1/1
0.9
Epoch 1/1
0.9
Epoch 1/1
0.9
Epoch 1/1
0.9
Epoch 1/1
0.9
Epoch 1/1
0.9
Accuracy sobre el test set 0.95


Calculamos la predicción de la red sobre el test 

In [10]:
num = getFrameNumber() #Cargamos el numero de frames del video

test = [] 
n = int(num//50) #Calculamos de 50 en 50 para evitar colapso en la ram o la memoria de video
for i in range(n):
    print("{:} de {:}".format(i, n))
    frames = getFrames(50)
    frames = np.asarray(frames)
    predictions = best_model.predict(frames) #Testeamos el modelo sobre el mejor resultado
    for j in range(len(predictions)):
        test.append(np.argmax(predictions[j]))
        

0 de 84
1 de 84
2 de 84
3 de 84
4 de 84
5 de 84
6 de 84
7 de 84
8 de 84
9 de 84
10 de 84
11 de 84
12 de 84
13 de 84
14 de 84
15 de 84
16 de 84
17 de 84
18 de 84
19 de 84
20 de 84
21 de 84
22 de 84
23 de 84
24 de 84
25 de 84
26 de 84
27 de 84
28 de 84
29 de 84
30 de 84
31 de 84
32 de 84
33 de 84
34 de 84
35 de 84
36 de 84
37 de 84
38 de 84
39 de 84
40 de 84
41 de 84
42 de 84
43 de 84
44 de 84
45 de 84
46 de 84
47 de 84
48 de 84
49 de 84
50 de 84
51 de 84
52 de 84
53 de 84
54 de 84
55 de 84
56 de 84
57 de 84
58 de 84
59 de 84
60 de 84
61 de 84
62 de 84
63 de 84
64 de 84
65 de 84
66 de 84
67 de 84
68 de 84
69 de 84
70 de 84
71 de 84
72 de 84
73 de 84
74 de 84
75 de 84
76 de 84
77 de 84
78 de 84
79 de 84
80 de 84
81 de 84
82 de 84
83 de 84


Calculamos la tasa de acierto

In [11]:
gt = pickle.load(open("clasificacion_frames.sav", 'rb')) #Cargamos el ground truth 

right = 0

for i in range(len(test)):
    right += test[i]==gt[i]
    
print(right/len(test))

0.8721428571428571
