In [0]:
import os
import numpy as np
seed = 8
np.random.seed(seed)
import pickle
import pandas as pd
import tensorflow as tf
import math
from sklearn.preprocessing import LabelBinarizer
from random import shuffle
import scipy
import cv2

from tensorflow.python.client import device_lib
from tensorflow.keras import metrics 

def get_available_devices():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos]

get_available_devices()

In [0]:
def loadScreenshots():
  screenDict = dict()
  for root, dirs, files in os.walk("C:\\Users\\Adjek\\Nextcloud\\DeeptechAI\\picturesCut"):
    for file in files:
      url = os.path.basename(os.path.normpath(root))
      imgPath = os.path.join(root, file)
      
      if(file == "convertedImg.jpg"):
        img = cv2.imread(imgPath, 0)
        screenDict[url] = img
      
  return screenDict

def createDataSet(enc, screens, csv):
    
    X = list()
    Y = list()
    U = list()
    
    for url in screens.keys():
      #if(isinstance(csv[url], str) & (len(screens[url]) == 20)):
        print(screens[url].shape)
        X.append(screens[url])
        Y.append(csv[url])
        U.append(url)
        
    Y = enc.fit_transform(Y)
    
    return np.array(X), np.array(Y), U

def reorderData(csv):
    urlList = csv["Domain"]
    sectionList = csv["WZ2008 Section"]
    
    resultDict = dict()
    for i in range(len(urlList)):
      resultDict[urlList[i]] = sectionList[i]
      
    return resultDict

In [0]:
data = pd.read_csv("C:\\Users\\Adjek\\Nextcloud\\DeeptechAI\\data\\urls.csv", sep = ";", encoding = "ISO-8859-1")
keyValueData = reorderData(data)

screenshots = loadScreenshots()

encoder = LabelBinarizer()
dataSet_X, dataSet_Y, urls = createDataSet(encoder, screenshots, keyValueData)
num_classes = dataSet_Y[1].shape[0]

In [0]:
print(screenshots['www.auto-senger.de'].shape)

In [0]:
dataSet_X = dataSet_X.reshape(dataSet_X.shape[0], 84, 48, 1)
print(dataSet_X.shape)
#dataSet_Y = dataSet_Y.reshape(dataSet_Y.shape[0], 19, 1)
#print(dataSet_Y.shape)

In [0]:
print(dataSet_X.shape)
print(dataSet_Y.shape)
print(dataSet_Y[0])

In [0]:
trainSize = math.floor(len(dataSet_X) * 0.6)
validationSize = math.floor(len(dataSet_X) * 0.8)

#shuffle(dataSet_X)
#shuffle(dataSet_Y)


train_X = dataSet_X[0:trainSize]
train_Y = dataSet_Y[0:trainSize]
train_U = urls[0:trainSize]

validation_X = dataSet_X[trainSize:validationSize]
validation_Y = dataSet_Y[trainSize:validationSize]
validation_U = urls[trainSize:validationSize]

test_X = dataSet_X[validationSize:len(dataSet_X)]
test_Y = dataSet_Y[validationSize:len(dataSet_X)]
test_U = urls[validationSize:len(dataSet_X)]

print(len(train_X))
print(len(validation_X))
print(len(test_X))

In [0]:
def screenshot_model():
    # create model
    inputs = tf.keras.Input(shape=(84, 48, 1))
    conv_1 = tf.keras.layers.Conv2D(10, (5, 5), activation=tf.nn.relu)(inputs)
    max_1 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(conv_1)
    conv_2 = tf.keras.layers.Conv2D(5, (3, 3), activation=tf.nn.relu)(max_1)
    max_2 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(conv_2)
    conv_3 = tf.keras.layers.Conv2D(5, (3, 3), activation=tf.nn.relu)(max_2)
    max_3 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(conv_3)
    drop_1 = tf.keras.layers.Dropout(0.5)(max_3)
    flat_1 = tf.keras.layers.Flatten()(drop_1)
    dense_1 = tf.keras.layers.Dense(5, activation=tf.nn.relu)(flat_1)
    dense_2 = tf.keras.layers.Dense(num_classes, activation=tf.nn.softmax)(dense_1)
    
    model = tf.keras.Model(inputs=inputs, outputs=dense_2)
    
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=[metrics.categorical_accuracy])
    model.summary()
    return model
  
def screenshot_small_model():
    # create model
    inputs = tf.keras.Input(shape=(84, 48, 1))
    conv_1 = tf.keras.layers.Conv2D(10, (2, 2), activation=tf.nn.relu)(inputs)
    max_1 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(conv_1)
    conv_2 = tf.keras.layers.Conv2D(10, (2, 2), activation=tf.nn.relu)(max_1)
    max_2 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(conv_2)
    conv_3 = tf.keras.layers.Conv2D(10, (2, 2), activation=tf.nn.relu)(max_2)
    max_3 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(conv_3)
    conv_4 = tf.keras.layers.Conv2D(10, (2, 2), activation=tf.nn.relu)(max_3)
    max_4 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(conv_4)
    conv_5 = tf.keras.layers.Conv2D(10, (2, 2), activation=tf.nn.relu)(max_4)
    max_5 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(conv_5)
    drop_1 = tf.keras.layers.Dropout(0.5)(max_5)
    flat_1 = tf.keras.layers.Flatten()(drop_1)
    dense_1 = tf.keras.layers.Dense(5, activation=tf.nn.relu)(flat_1)
    dense_2 = tf.keras.layers.Dense(num_classes, activation=tf.nn.softmax)(dense_1)
    
    model = tf.keras.Model(inputs=inputs, outputs=dense_2)
    
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=[metrics.categorical_accuracy])
    model.summary()
    return model

In [0]:
path = 'C:\\Jupyter'
tbCallBack = tf.keras.callbacks.TensorBoard(log_dir=path,
                                            write_graph=True,
                                            write_images=True,
                                            histogram_freq=0)

In [0]:
#@title Wähle das Device

# build the model
device = '/device:CPU:0' #@param ['/device:CPU:0', '/device:GPU:0']
with tf.device(device):
    model = screenshot_model()
    
# Fit the model
    model.fit(train_X, train_Y, validation_data=(validation_X, validation_Y), epochs=30, batch_size=200)

# Final evaluation of the model
    scores = model.evaluate(test_X, test_Y, verbose=1)
print("Screenshot Network Error: %.2f%%" % (100-scores[1]*100))

In [0]:
example_X = test_X[2:30]
example_Y = test_Y[2:30]
example_U = test_U[2:30]



prediction = model.predict(example_X)

predictedClasses = encoder.inverse_transform(prediction)
originalClasses = encoder.inverse_transform(example_Y)

In [0]:
#print(prediction)

for i in range(len(prediction)):
  print(str(prediction[i].argmax()) + "<->" + str(example_Y[i].argmax()) + " für URL: " + example_U[i] + " mit predicted Klasse " + predictedClasses[i] + " Lösung: " + originalClasses[i])