In [1]:
import tensorflow as tf
import keras
from keras import models
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.models import load_model, Model
from keras.layers import *
from sklearn.metrics import confusion_matrix
import os
import sys
import numpy as np
from score_dataset_helpers import *
import pandas as pd
from itertools import chain

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
base_dir = 'C:/Users/bened/OneDrive/Arbeit/Lernen/python_training/data_dual_inputs/'
train_dir = base_dir + 'train'
test_dir = base_dir + 'test'

## Define model

In [199]:
f1, f2, f3 = 32,64,128
c_size = (10,10)
k_size = (2,2)

pics_input = Input(shape = (75, 250, 3))
pics_c1 = Conv2D(filters = f1, kernel_size = c_size, activation = "relu")(pics_input)
pics_m1 = MaxPooling2D(k_size)(pics_c1)

pics_c2 = Conv2D(filters = f2, kernel_size = c_size, activation = "relu")(pics_m1)
pics_m2 = MaxPooling2D(k_size)(pics_c2)

pics_c3 = Conv2D(filters = f3, kernel_size = c_size, activation = "relu")(pics_m2)
pics_m3 = MaxPooling2D((k_size))(pics_c3)

pics_f = Flatten()(pics_m2)
pics_drop = Dropout(0.5)(pics_f)
pics_d = Dense(20, activation = "relu")(pics_drop)

nums_input = Input(shape = ([1]))
nums_features = Dense(1, activation = "relu")(nums_input)

conc_layer = concatenate([pics_d, nums_features])
dense_1 = Dense(50, activation = "relu")(conc_layer)
dense_2 = Dense(1, activation = "sigmoid")(dense_1)

model = Model(inputs = [pics_input, nums_input], outputs = dense_2)
model.compile(loss = "binary_crossentropy", optimizer = "adam", metrics = ["accuracy"])

In [200]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_54 (InputLayer)           (None, 75, 250, 3)   0                                            
__________________________________________________________________________________________________
conv2d_80 (Conv2D)              (None, 66, 241, 32)  9632        input_54[0][0]                   
__________________________________________________________________________________________________
max_pooling2d_79 (MaxPooling2D) (None, 33, 120, 32)  0           conv2d_80[0][0]                  
__________________________________________________________________________________________________
conv2d_81 (Conv2D)              (None, 24, 111, 64)  204864      max_pooling2d_79[0][0]           
__________________________________________________________________________________________________
max_poolin

## Create vectors of scores from pictures

In [30]:
train_scores = train_dir + "/score"
train_pics = train_dir + "/pics"

test_scores = test_dir + "/score"
test_pics = test_dir + "/pics"

In [31]:
train_names_scores = [f for f in os.listdir(train_scores)]
train_names_pics = [f for f in os.listdir(train_pics)]

test_names_scores = [f for f in os.listdir(test_scores)]
test_names_pics = [f for f in os.listdir(test_pics)]

In [32]:
print(str(len(train_names_scores)) + " training files for scores")
print(str(len(train_names_pics)) + " training files for pictures")

print(str(len(test_names_scores)) + " test files for scores")
print(str(len(test_names_pics)) + " test files for pictures")

1077 training files for scores
1077 training files for pictures
108 test files for scores
108 test files for pictures


In [33]:
train_dataset_score = create_dataset("score", train_names_scores, base_dir + "train/")

test_dataset_score = create_dataset("score", test_names_scores, base_dir + "test/")

In [34]:
def get_result(result):
    resultstr = ''
    for i in range(5):
        resultstr += str(np.argmax(result[i]))
    return resultstr

In [35]:
score_model = load_model("C:/Users/bened/OneDrive/Arbeit/Lernen/python_training/models/score_v2.h5")

In [36]:
#Create vectors for scores and for labels
score_vec = []
for i in range(len(train_names_scores)):
    y_pred = score_model.predict(train_dataset_score[i].reshape(1, 50, 160, 1))
    score_vec.append(get_result(y_pred))

label_vec = ['up' in x for x in train_names_pics]
label_vec = np.array(label_vec)
label_array = label_vec.astype(float)

score_vec_test = []
for i in range(len(test_names_scores)):
    y_pred_test = score_model.predict(test_dataset_score[i].reshape(1, 50, 160, 1))
    score_vec_test.append(get_result(y_pred_test))

label_vec_test = ['up' in x for x in test_names_pics]
label_vec_test = np.array(label_vec_test)
label_array_test = label_vec_test.astype(float)

In [37]:
print_dat = pd.DataFrame({'score' : score_vec, 'label' : label_array, 'orig_name' : train_names_scores})
print_dat

Unnamed: 0,label,orig_name,score
0,0.0,right_1.png,00008
1,0.0,right_10.png,00073
2,0.0,right_100.png,00172
3,0.0,right_101.png,00185
4,0.0,right_102.png,00190
5,0.0,right_105.png,00200
6,0.0,right_106.png,00200
7,0.0,right_107.png,00226
8,0.0,right_108.png,00238
9,0.0,right_109.png,00243


In [38]:
pic_array = np.zeros((len(train_names_pics), 75, 250, 3))

for i in range(len(train_names_pics)):
    x = load_img(train_pics + '/' + train_names_pics[i], target_size = (75, 250))
    #x = img_to_array(x)
    pic_array[i] = x
    
pic_array_test = np.zeros((len(test_names_pics), 75, 250, 3))

for i in range(len(test_names_pics)):
    x = load_img(test_pics + '/' + test_names_pics[i], target_size = (75, 250))
    #x = img_to_array(x)
    pic_array_test[i] = x

In [39]:
pic_array = np.array(pic_array)
pic_array /= 255
score_array = np.array(score_vec)
score_array = np.float32(score_array)

pic_array_test = np.array(pic_array_test)
pic_array_test /= 255
score_array_test = np.array(score_vec_test)
score_array_test = np.float32(score_array_test)

In [40]:
pic_array.shape

(1077, 75, 250, 3)

In [41]:
score_array.shape

(1077,)

In [42]:
label_array.shape

(1077,)

In [201]:
model.fit([pic_array, score_array], label_array, epochs = 5, shuffle = True, batch_size = 100, validation_split = 0.2)

Train on 861 samples, validate on 216 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x22471db9748>

In [109]:
#model.save("C:/Users/bened/OneDrive/Arbeit/Lernen/python_training/models/model_dual_input_v3.h5")

In [110]:
#model = load_model("C:/Users/bened/OneDrive/Arbeit/Lernen/python_training/models/model_dual_input.h5")

In [194]:
results = model.predict([pic_array_test, score_array_test]) > 0.5

In [195]:
label_array_test = label_array_test > 0.5
results = [y for x in results.tolist() for y in x]
correct = [r == l for (r, l) in zip(results, label_array_test)]

test_df = pd.DataFrame({"results" : results, "labels" : label_array_test.tolist(),
                        "score" : score_array_test.tolist(), "correct_prediction" : correct})

In [196]:
test_df["correct_prediction"].value_counts()

True     69
False    39
Name: correct_prediction, dtype: int64

In [190]:
confusion_matrix(label_array_test, results)

array([[69,  0],
       [39,  0]])

In [86]:
results

[False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 True,
 False,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 True,
 False,
 True,
 True,
 True,
 False,
 True,
 True,
 True,
 True,
 False,
 True,
 True]