In [11]:
from pathlib import Path, PosixPath
import pandas.plotting
import sys
import cv2 as cv
import tensorflow as tf 
#import git # pip install GitPython
import os
import matplotlib.pyplot as plt # pip install matplotlib
import numpy as np
import time
import pandas as pd
import random as rnd
from tensorflow.python import debug as tf_debug
import gspread
from oauth2client.service_account import ServiceAccountCredentials

In [12]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)
print("=====>GPU Available: ", tf.test.is_gpu_available())
tf.debugging.set_log_device_placement(True)
tf.config.experimental.list_physical_devices()
print(tf.version.GIT_VERSION, tf.version.VERSION)

=====>GPU Available:  True
v1.14.0-rc1-22-gaf24dc9 1.14.0


# Config

In [None]:
#Log Data
print('Modification:')
modification = input() # Was wurde verändert
print('User:')
user = input()#git.util.get_user_id() # Bearbeiter, Sollte automatisch gelesen werden 
try:
    repo = git.Repo()
    branch = repo.active_branch # Projekt, Sollte automatisch gelesen werden
    lastCommit = repo.head.commit.hexsha #"ca324dadsa" # Stand, Sollte automatisch gelesen werden
except:
    print('Branch:')
    
    branch = input()
    lastCommit = ''
env = "Hal9k" # Test umgebung

In [13]:
# Model Config
samplesize = 1000 # Datensatz
split = 0.8  # Testdatensatz in % => 0.8 = 80% testdata
BATCH_SIZE = 80
IMG_HEIGHT = 224
IMG_WIDTH = 224

# Data Preparation

## Load Images
replaced with the code form Helena

In [14]:
def loadImg(paths, dimensions:int, output_size:int, sync:bool, aug:list=['all']): 
    images = {}
    for p in paths:
        img = cv.imread(str(p))
        img2= cv.resize(img,dsize=(IMG_HEIGHT,IMG_WIDTH), interpolation = cv.INTER_CUBIC)
        #Numpy array
        images[p.stem] = np.asarray(img2)
    return images

## Display Images

In [15]:
def showBatch(image_batch, label_batch, prediction=None):
    plt.figure(figsize=(20,20))
    for n in range(20):
        ax = plt.subplot(5,5,n+1)
        ax.imshow(image_batch[n])
        if prediction is not None:
            plt.title(f'{CLASS_NAMES[int(label_batch[n])]} | {CLASS_NAMES[int(np.argmax(prediction[n]))]}')
        else:
            plt.title(f'{CLASS_NAMES[int(label_batch[n])]}')
        ax.axis('off')
    plt.show()
        

## Format Labels / Classes /  Images

In [16]:
## Get Classes form Folder
data_dir = Path(os.path.join(Path.home(), 'Pictures/Pictures_Bicycles'))
image_count = len(list(data_dir.glob('*/*.jpg')))
CLASS_NAMES = []
for f in np.array([item.name for item in data_dir.glob('Training/*')]):
    name = f.split('_')[0]
    if name not in CLASS_NAMES :
        CLASS_NAMES.append(name)
print (CLASS_NAMES)



def createBatches(l, n):
    # looping till length l 
    nl = []
    for i in range(0, len(l), n):  
        nl.append(l[i:i + n]) 
    return nl

def train_val_split(keys: list): ## Uses split form model config
    train = int(len(keys)*split)
    print (f'Trainsize: {train}')
    return keys[:train], keys[train:]

def getLabels(keys):
    labels = []
    for path in keys:
        if str(path).startswith('uni'):
            folder = str(path).split('_')[0]
        else:
            folder = str(path).split('_')[0] # 0 = manifacturer, 1 = type
        labels.append(float(CLASS_NAMES.index(folder)))
    return labels

def getImages(img, keys):
    images = []
    for k in keys:
        images.append(img[k])
    return images


['unicycle', 'giant', 'trek', 'cube', 'canyon', 'cannondale']


In [17]:
def prepareDataset(train_path, test_path):
    # Prepare list with Image Paths
    train_paths = list(train_path.glob('*.jpg'))
    test_paths = list(test_path.glob('*.jpg'))
    
    ## Load Images
    raw_train_images = loadImg(train_paths, IMG_HEIGHT, samplesize, True) 
    raw_test_images = loadImg(test_paths, IMG_HEIGHT, 0, True) # Load images with just resize
    
    # Gen Train and Test Dataset
    train_keys = list(raw_train_images.keys()) 
    rnd.shuffle(train_keys) ## Shuffle traings dataset
    train, val = train_val_split(train_keys) # Spilt dataset for train and validation
    # Change dict to list 
    train_labels = getLabels(train)
    train_images = getImages(raw_train_images, train)
    val_labels = getLabels(val)
    val_images = getImages(raw_train_images, val)
    
    
    #Gen Test Dataset
    test = list(raw_test_images.keys())
    test_labels = getLabels(test)
    test_images = getImages(raw_test_images, test)
    
        
    print(f'Train {len(train_labels)}, {len(train_images)};  Val {len(val_labels)} {len(val_images)};  Test {len(test_labels)}, {len(test_images)}')
    
    return (createBatches(train_images, BATCH_SIZE), createBatches(train_labels,BATCH_SIZE)), (createBatches(val_images, int(BATCH_SIZE*(1.05-split))), createBatches(val_labels,int(BATCH_SIZE*(1.05-split)))),(test_images, test_labels)

In [18]:
train, val, test = prepareDataset(Path(os.path.join(data_dir, 'Training')), Path(os.path.join(data_dir, 'Test')))

Trainsize: 340
Train 340, 340;  Val 85 85;  Test 85, 85


In [19]:
print(f'{len(train)} {len(train[0])}, {len(val)} {len(val[0])}, {len(test)} {len(test[0])}')

2 5, 2 5, 2 85


# TF Code
## Build and Train model

In [20]:
### Your Code
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
    ## Network Layers
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(len(CLASS_NAMES), activation='softmax')
])
model.summary()
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

## Start Timer 
start = time.time()
for n in range(len(train[0])):
    print(f'Batch: {n}')
    # Train
    tibatch = [train[0][n]]
    tlbatch = [train[1][n]]
    model.fit(tibatch, tlbatch, epochs=10, shuffle=True, steps_per_epoch=BATCH_SIZE)
    
    #val
    vibatch = [val[0][n]]
    vlbatch = [val[1][n]]
    test_loss, test_acc = model.evaluate(vibatch, vlbatch, verbose=2, steps=5)
    
## End Timer
end = time.time()
runtime = end-start
maxAcc = test_acc
maxLoss = test_loss

print('\nTest accuracy: {}, \nRuntime: {}'.format(test_acc, runtime))


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 224, 224, 16)      448       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 112, 112, 16)      0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 200704)            0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               25690240  
_________________________________________________________________
dense_3 (Dense)              (None, 6)                 774       
Total params: 25,691,462
Trainable params: 25,691,462
Non-trainable params: 0
_________________________________________________________________
Batch: 0
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10


KeyboardInterrupt: 

## Predict Test Images and Print results
real value | prediction

In [None]:
## pick 20 random images form trainset
test_images, test_labels = [], []
ids = rnd.sample(range(0, len(test[0])), 20)
for i in range(20):
    test_images.append(test[0][ids[i]])
    test_labels.append(test[1][ids[i]])
    
ibatch = [test_images]
predictions = model.predict(ibatch, steps=2)
showBatch(test_images, test_labels, predictions)

### validate Accuracy based on Testfiles

In [None]:
#for pred in predictions:
preds = list(np.argmax(pred) for pred in predictions)
print(preds)
validatedTestAcc = 0
for i in range(len(test_labels)):
    print(f'Real: {test_labels[i]} => Est: {preds[i]}, Value: {max(predictions[i])}')
    if int(test_labels[i]) == preds[i]:
        validatedTestAcc += 1
validatedTestAcc = validatedTestAcc / len(test_labels)

# Log

In [None]:
# use creds to create a client to interact with the Google Drive API
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name(os.path.join(Path.home(),'client-secret.json'), scope)
client = gspread.authorize(creds)

# File and WorkSheet 
sheet = client.open("Model Evaluation").sheet1

# Read row form sheet
# readval = sheet.row_values(1)

sheet.append_row([modification, branch, user, lastCommit, str(samplesize), str(split), env, str(runtime), str(maxAcc), str(maxLoss), str(validatedTestAcc), str(BATCH_SIZE), str(IMG_HEIGHT)])