In [None]:
#Francois Chollet's book - Deep Learning with Keras

#sequential model used in previous examples is good, but quite limiting as it has only one input and one output of data
#several layers of data processing and creating output data
#can only process one data type, can be good for solving some problems but others are more complex
#some problems such as predicting of online shopping behaviour requires multiple data inputs - numbers, videos, pictures, hashtags etc
#some models allow non-linear crosslinking of different layers to enable data processing from multiple inputs, graphs of layers
#joint training of model with different inputs instead of training individual separate models
#some models may produce multiple outputs - date and genre of book being published
#inception models - models trained on multiple convolution layers, the output is a tensor of all outputs combined
#residual connections models - He in Microsoft, processed outputs of different models will be joined together to form new outputs, reduces loss of 
#already taught information, speeds up processing of complex data
#adding API functional interface - individual layers of the model become functions that return tensors, processes tensors directly

from keras import Input, layers
input_tensor = Input(shape=(32,))
dense = layers.Dense(32, activation='relu')
output_tensor = dense(input_tensor)

#creating simple sequential model that will be processed through the API interface
from keras.models import Model, Sequential
from keras import layers
from keras import Input

seq_model = Sequential()
seq_model.add(layers.Dense(32, activation='relu', input_shape=(64,)))
seq_model.add(layers.Dense(32, activation='relu'))
seq_model.add(layers.Dense(10, activation='softmax'))

input_tensor = Input(shape=(64,))
x = layers.Dense(32, activation='relu')(input_tensor)
x = layers.Dense(32, activation='relu')(x)
output_tensor = layers.Dense(10, activation='softmax')(x)
model = Model(input_tensor, output_tensor)
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 64)]              0         
                                                                 
 dense_4 (Dense)             (None, 32)                2080      
                                                                 
 dense_5 (Dense)             (None, 32)                1056      
                                                                 
 dense_6 (Dense)             (None, 10)                330       
                                                                 
Total params: 3,466
Trainable params: 3,466
Non-trainable params: 0
_________________________________________________________________


In [None]:
#keras usually on its own creates and inputs input and output tensors 
from keras.utils.np_utils import to_categorical
model.compile(optimizer='rmsprop', loss='caterogical_crossentropy')
#generating random inputs
import numpy as np
x_train = np.random.random((1000, 64))
y_train = np.random.random((1000, 10))

#training model
model.fit(x_train, y_train, epochs=10, batch_size=128)

#evaluating the model
score = model.evaluate(x_train, y_train)

Epoch 1/10


ValueError: ignored

In [None]:
#moving onto more code that actually works
#keras model with multiple inputs, requires and a layer that combines these inputs/multiple tensors together known as keras.layers.add and keras.layers.concentrate
#example model with 2 inputs, 1 input using natural language to ask a question, 2nd input collects information from articles to find the answer
#model then tries to generate the answer to the question using the 2 inputs, usually one word using the softmax activation mode to generate a dictionary
#model uses API functional interface
#2 inputs are independent of each other, data from inputs are converted into vectors - dictionary of words, and a question formed by natural language
#concatenated together and compiled with softmax classifactor

from keras.models import Model
from keras import layers
from keras import Input
from tensorflow.keras.optimizers import Adam

#text input is a sequence of whole numbers, varying length, has to be named at the start
text_vocabulary_size = 10000
question_vocabulary_size = 10000
answer_vocabulary_size = 500

text_input = Input(shape=(None, ), dtype = 'int32', name='text')

#setting input data into sequence vectors of size 64
embedded_text = layers.Embedding(64, text_vocabulary_size)(text_input)

#saves text vectors into shared vector using LSTM shared layer
encoded_text = layers.LSTM(32)(embedded_text)

#same process to process questions, using different input layers
question_input = Input(shape=(None, ), dtype='int32', name='question')
embedded_question = layers.Embedding(32, question_vocabulary_size)(question_input)
encoded_question = layers.LSTM(32)(embedded_question)

#concatenation of encoded dictionary and questions
concatenated = layers.concatenate([encoded_text, encoded_question], axis=1)

#adding last classificator softmax
answer = layers.Dense(answer_vocabulary_size, activation='softmax')(concatenated)

#creating the model with 2 inputs and 1 output
model = Model([text_input, question_input], answer)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['acc'])

#inputing data into the model, 2 ways to input data for training of model with 2 inputs
#1st method is inputing data as Numpy tables 
#2nd method is giving dictionary that assigns Numpy tables to input data, requires labelling of input data
import numpy as np
num_samples = 1000
max_length = 10

#generating random input Numpy tables
text = np.random.randint(1, text_vocabulary_size, size=(num_samples, max_length))
question = np.random.randint(1, question_vocabulary_size, size=(num_samples, max_length))
answers = np.random.randint(0, 1, size=(num_samples, answer_vocabulary_size))

#tables are generated using hot one encoder, not whole numbers
#fitting using list of input objects
model.fit([text, question], answers, epochs=10, batch_size=128)

#fitting using dictionary of input objects, this method can only be used with input has labels
model.fit({'text': text, 'question': question}, answers, epochs=10, batch_size=128)

#doesn't work as I am using CPU, if I used GPU then the graphic error input wouldn't appear
#change size or change numbers assigned to nodes


Epoch 1/10


InvalidArgumentError: ignored

In [3]:
#model with multiple outputs
#simple example is a model that tris to predict attributes of a person who anonimoyously posts on social media
#different outputs can be different traits of this person - age, gender, income
#still using keras API functional interface

from keras import layers
from keras.models import Model
from keras import Input


vocabulary_size = 50000
num_income_groups = 10

posts_input = Input(shape=(None, ), dtype='int32', name='posts')
embedded_posts = layers.Embedding(256, vocabulary_size)(posts_input)

x = layers.Conv1D(128, 5, activation='relu')(embedded_posts)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dense(128, activation='relu')(x)

#naming of the output layers
age_prediction = layers.Dense(1, name='age')(x)
income_prediction = layers.Dense(num_income_groups, 
                                 activation='softmax',
                                 name='income')(x)
gender_prediction = layers.Dense(1, activation='sigmoid', name='gender')(x)

model = Model(posts_input, [age_prediction, income_prediction, gender_prediction])

#requires definition of loss of function for every output layer
#definition of age requires scalar loss function, 
#gender is considered here as binary so binary loss of function
#algorithm of gradient loss requires calculating scalar values and combining them together into one value
#model with multiple outputs combines results of different loss of functions, keeps them and 
#saves a global loss value that will be minimised during training of the model
#easiest way of combining is summing of different loss of functions using a compiler
#many ways of summing all the loss of functions

model.compile(optimizer='rmsprop', loss=['mse', 'categorical_crossentropy', 'binary_crossentropy'])

#only use this formatting if output layers have been labelled
model.compile(optimizer='rmsprop',
              loss={'age': 'mse', 'income': 'categorical_crossentropy',
                    'gender': 'binary_crossentropy'})


#need to define loss value otherwise output layers will be trained to highest loss value
#will cost data processing time and cause other problems
#need to define scale/values of different loss functions by defining global loss value
#scalar loss function calculating age, usually gives squared error values 3-5
#binary crossentropy can only take values of 0 and 1
#to resolve any issues and assign global value, assign value 10 to categorical crossentropy and squared error to 0.25
model.compile(optimizer='rmsprop', loss=['mse', 'categorical_crossentropy', 'binary_crossentropy'],
              loss_weights=[0.25, 1, 10])


model.compile(optimizer='rmsprop',
              loss={'age': 'mse', 'income': 'categorical_crossentropy',
                    'gender': 'binary_crossentropy'},
              loss_weights={'age': 0.25, 
                            'gender': 1, 
                            'income': 10})

#same as model with multiple inputs this model requires input of Numpy tables
#using the Numpy tables created above
model.fit(posts,[age_targets, income_targets, gender_targets], epochs=10, batch_size=64)

model.fit(posts,{'age': age_targets, 'income': income_targets, 'gender': gender_targets}, epochs=10, batch_size=64)

#cannot find all the Numpy tables, code to define these wasn't included


NameError: ignored

In [None]:
#API interface also enables building of models that have multiple layers, not just inputs and outputs
#allows complex model topology as long as the model is non-cyclic, does not allow for loops
#tensor cannot be input into the same layer that created it
#loops are only used to create re-current connections between convolutional layers
#many famous types of models such as Inception and residual connection models
#inception model - created by a group in Google, made up of several modules resembling small independent networks, usually 3 or 4 branches
#inception model always starts with CNN 1x1 layer, followed by CNN 3x3 layer, last layer combines all the outputs together
#makes it easier to train spatial and channel data, more complex Inception models can contain pooling layers and CNN layers with larger dimensions, e.g, 5x5
#unlike with simple image processing model, instead of Dense layers to process data from previous layers, Inception uses convolutional point layers


#every single layer has same number of steps which allows easy concatenation in the final layer
#simple example of Inception model with 4 layers
from keras import layers

branch_a = layers.Conv2D(128, 1, activation='relu', strides=2)(x)

#spatial convolutional layer
branch_b = layers.Conv2D(128, 1, activation='relu')(x)
branch_b = layers.Conv2D(128, 3, activation='relu', strides=2)(branch_b)

#using averaging method
branch_c = layers.AveragePooling2D(3, strides=2)(x)
branch_c = layers.Conv2D(128, 3, activation='relu', strides=2)(branch_c)

branch_d = layers.Conv2D(128, 1, activation='relu')(x)
branch_d = layers.Conv2D(128, 3, activation='relu')(branch_d)
branch_d = layers.Conv2D(128, 3, activation='relu', strides=2)(branch_d)

#concatenation of the different layers located on independent branches
output = layers.concatenate([branch_a, branch_b, branch_c, branch_d], axis=1)

#keras has dedicated library to inception models in keras.applications.inception_v3, 
#includes model for processing ImageNet pictures, and weights associated
#also includes another library Xception, more extreme Inception models, modules are replaced with CNN followed by 1x1 point convolutional layers
#allowing data to be input in every layer separately, faster processing of spatial data and channels, more efficient use of model parameters
#same structure as Inception V3 model

In [None]:
#residual conncetions - used in many models after year 2015, invented by Microsoft, improves processing of data of large models (>10 layers)
#resolves problems of large models such as disappearing gradient and narrow representations of output
#residual connections work by converting data processed by previous layer as output representations of the next layer, summing and averaging these outputs
#no concatenation between layers
#usually assumed that all layers produce tensors with same dimensions, or uses linear transformation to change output dimensions
#linear transformation can be done with Dense layer no activation or CNN 1x1 layer no activation

#simple residual connection, assumes x is 4x4 tensor
from keras import layers
x = ...
#transforming tensor x
y = layers.Conv2D(128, 3, activation='relu', padding='same')(x)
y = layers.Conv2D(128, 3, activation='relu', padding='same')(y)
y = layers.Conv2D(128, 3, activation='relu', padding='same')(y)

#adds both tensors to make same dimensions
y =layers.add([y, x])

#now assuming that tensors have different dimensions
x = ...
#transforming tensor x
y = layers.Conv2D(128, 3, activation='relu', padding='same')(x)
y = layers.Conv2D(128, 3, activation='relu', padding='same')(y)
y = layers.MaxPooling2D(2, strides=2)(y)

#1x1 CNN layer that performs linear transformation
residual = layers.Conv2D(128, 1, padding='same', strides=2)(x)

#adds both tensors with partial x tensor
y =layers.add([y, residual])

#narrow output problem = sequential models are based only on data processed by previous models, loss of data, only from activated layers, if
#output tensor is small or layer has small dimensions then the following layer will have limited data to work with, residual connection resolves this
#by passing by previous data before activated layers into the next layers