## Keras

- deep learning framework
- enables fast experimentation
- runs on top of other frameworks
- fast industry ready models
- less code
- build any architecture
- deploys in multiple platforms

## Simple NN with Keras

### Non-Sequential Model

        from keras.layers import Input, Dense

        # Input layer
        input_tensor = Input(shape=(1,))

        # connect dense layer to input_tensor layer
        output_layer = Dense(1)(input_tensor)
        
        # Build the model
        from keras.models import Model
        model = Model(input_tensor, output_tensor)
        
        # Summarize the model
        model.summary()

### Sequential model

    # Import the Sequential model and Dense layer
    from keras.models import Sequential
    from keras.layers import Dense

    # Create a Sequential model
    model = Sequential()

    # Add an input layer and a hidden layer with 10 neurons
    model.add(Dense(10, input_shape=(2,), activation="relu"))

    # Add a 1-neuron output layer
    model.add(Dense(1))

    # Summarise your model
    model.summary()
    
### compile model

    # Compile your model
    model.compile(optimizer = 'adam', loss = 'mse')

    print("Training started..., this can take a while:")

### train model

    # Fit your model on your data for 30 epochs
    model.fit(time_steps,y_positions, epochs = 30)

### predict new data

    # Predict the eighty minute orbit
    eighty_min_orbit = model.predict(np.arange(-40, 41))

### evaluate results

    # Evaluate your model 
    print("Final lost value:",model.evaluate(time_steps, y_positions))

## Summarize and Visualize Keras Model Flow


        # Import the plotting function
        from keras.utils import plot_model
        import matplotlib.pyplot as plt

        # Summarize the model
        model.summary()

        # Plot the model
        plot_model(model, to_file='model.png')

        # Display the image
        data = plt.imread('model.png')
        plt.imshow(data)
        plt.show()


## Binary Classification with Keras

### explore dataset

    # Import seaborn
    import seaborn as sns

    # Use pairplot and set the hue to be our class
    sns.pairplot(banknotes, hue='class') 

    # Show the plot
    plt.show()

    # Describe the data
    print('Dataset stats: \n', banknotes.describe())

    # Count the number of observations of each class
    print('Observations per class: \n', banknotes['class'].value_counts())
    
### build model

    # Import the sequential model and dense layer
    from keras.models import Sequential
    from keras.layers import Dense

    # Create a sequential model
    model = Sequential()

    # Add a dense layer - use sigmoid activation function as last activation function for binary classification 
    model.add(Dense(1, input_shape=(4,), activation='sigmoid'))

### Compile model
  
      #use binary crossentropy loss function for binary classification model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy'])

    # Display a summary of your model
    model.summary()
    
### train model

    # Train your model for 20 epochs
    model.fit(X_train, y_train, epochs=20)
    
### evaluate model

    # Evaluate your model accuracy on the test set
    accuracy = model.evaluate(X_test, y_test)[1]

    # Print accuracy
    print('Accuracy:',accuracy)

## Multiclass Classification with Keras

    # Instantiate a sequential model
    model = Sequential()

    # Add 3 dense layers of 128, 64 and 32 neurons each
    model.add(Dense(128, input_shape=(2,), activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))

    # Add a dense layer with as many neurons as competitors
    model.add(Dense(4, activation='softmax'))
    
### compile model

    # Compile your model using categorical_crossentropy loss (for multi class classification)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
                  
### transforming label strings to numeric data using pandas

    # Transform into a categorical variable
    darts.competitor = pd.Categorical(darts.competitor)

    # Assign a number to each category (label encoding)
    darts.competitor = darts.competitor.cat.codes 

    # Print the label encoded competitors
    print('Label encoded competitors: \n',darts.competitor.head())
    
    # Import to_categorical from keras utils module
    from keras.utils import to_categorical

    # Use to_categorical on your labels
    coordinates = darts.drop(['competitor'], axis=1)
    competitors = to_categorical(darts.competitor)

    # Now print the to_categorical() result
    print('One-hot encoded competitors: \n',competitors)
    
### train and evaluate model

    # Train your model on the training data for 200 epochs
    model.fit(coord_train,competitors_train,epochs=200)

    # Evaluate your model accuracy on the test data
    accuracy = model.evaluate(coord_test, competitors_test)[1]

    # Print accuracy
    print('Accuracy:', accuracy)

## Multi-label Classification with Keras

- single input can be assigned to more than one class

        # Instantiate a Sequential model
        model = Sequential()

        # Add a hidden layer of 64 neurons and a 20 neuron's input
        model.add(Dense(64, input_shape=(20,), activation='relu'))

        # Add an output layer of 3 neurons with sigmoid activation for multi label classification
        model.add(Dense(3, activation='sigmoid'))

        # Compile your model with adam and binary crossentropy loss for multi label classification
        model.compile(optimizer='adam',
                   loss='binary_crossentropy',
                   metrics=['accuracy'])

        model.summary()
        
        # Train for 100 epochs using a validation split of 0.2
        model.fit(sensors_train, parcels_train, epochs = 100, validation_split = 0.2)

        # Predict on sensors_test and round up the predictions
        preds = model.predict(sensors_test)
        preds_rounded = np.round(preds)

        # Print rounded preds
        print('Rounded Predictions: \n', preds_rounded)

        # Evaluate your model's accuracy on the test data
        accuracy = model.evaluate(sensors_test, parcels_test)

        # Print accuracy
        print('Accuracy:', accuracy)

## Keras Callbacks

- function that is executed after the training has finished
    - history
    - early stopping
    - model checkpoint
    
### using history to plot loss and accuracy differences between train and test sets

    # Train your model and save it's history
    history = model.fit(X_train, y_train, epochs = 50,
                   validation_data=(X_test, y_test))

    # Plot train vs test loss during training
    plot_loss(history.history['loss'], history.history['val_loss'])

    # Plot train vs test accuracy during training
    plot_accuracy(history.history['acc'], history.history['val_acc'])
    
### using EarlyStopping to maximize test accuracy and save best model using ModelCheckpoint

    # Import the EarlyStopping and ModelCheckpoint callbacks
    from keras.callbacks import EarlyStopping, ModelCheckpoint

    # Early stop on validation accuracy
    monitor_val_acc = EarlyStopping(monitor = 'val_acc', patience=3)

    # Save the best model as best_banknote_model.hdf5
    modelCheckpoint = ModelCheckpoint('best_banknote_model.hdf5', save_best_only = True)

    # Fit your model for a stupid amount of epochs
    history = model.fit(X_train, y_train,
                        epochs = 10000000,
                        callbacks = [monitor_val_acc, modelCheckpoint],
                        validation_data = (X_test, y_test))

## Improving Model Performance

### Learning Curves

- visuals that show differences in accuracy and loss from training to test data

            # Train your model and save it's history
            history = model.fit(X_train, y_train, epochs = 50,
                           validation_data=(X_test, y_test))

            # Plot train vs test loss during training
            plot_loss(history.history['loss'], history.history['val_loss'])

            # Plot train vs test accuracy during training
            plot_accuracy(history.history['acc'], history.history['val_acc'])
          
### activation functions

- sigmoid
- tanh
- ReLu
- Leaky ReLu
- no magic formula for which to use
- ReLu makes a good first choice
- sigmoids not recommended for deep networks
- experiment!

### batch size and normalization

#### mini batches
- great for large datasets that require lots of RAM
- performed during .fit as a batch_size= argument

#### batch normalization
- improves gradient flow
- allows higher learning rates
- reduces dependence on weight initialization
- acts as unintended form of regularization
- limits internal covariate shift
- applied as a layer in between layers

        # Import batch normalization from keras layers
        from keras.layers import BatchNormalization

        # Build your deep network
        batchnorm_model = Sequential()
        batchnorm_model.add(Dense(50, input_shape=(64,), activation='relu', kernel_initializer='normal'))
        batchnorm_model.add(BatchNormalization())
        batchnorm_model.add(Dense(50, activation='relu', kernel_initializer='normal'))
        batchnorm_model.add(BatchNormalization())
        batchnorm_model.add(Dense(50, activation='relu', kernel_initializer='normal'))
        batchnorm_model.add(BatchNormalization())
        batchnorm_model.add(Dense(10, activation='softmax', kernel_initializer='normal'))

        # Compile your model with sgd
        batchnorm_model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])
        
### Hyperparameter Tuning
- random search > grid search
- don't use many epochs
- use smaller sample of large datasets
- play with batch sizes, activations, optimizers and learning rates

#### create model

    # Creates a model given an activation and learning rate
    def create_model(learning_rate=0.01, activation='relu'):

        # Create an Adam optimizer with the given learning rate
        opt = Adam(lr=learning_rate)

        # Create your binary classification model  
        model = Sequential()
        model.add(Dense(128, input_shape=(30,), activation=activation))
        model.add(Dense(256, activation=activation))
        model.add(Dense(1, activation='sigmoid'))

        # Compile your model with your optimizer, loss, and metrics
        model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
        return model
        
#### use sklearn to perform randomized search

    # Import KerasClassifier from keras wrappers
    from keras.wrappers.scikit_learn import KerasClassifier

    # Create a KerasClassifier
    model = KerasClassifier(build_fn = create_model)

    # Define the parameters to try out
    params = {'activation': ['relu', 'tanh'], 'batch_size': [32, 128, 256], 
              'epochs': [50, 100, 200], 'learning_rate': [0.1, 0.01, 0.001]}

    # Create a randomize search cv object passing in the parameters to try
    random_search = RandomizedSearchCV(model, param_distributions = params, cv = KFold(3))

    # Running random_search.fit(X,y) would start the search,but it takes too long! 
    show_results()
    
#### run best parameters on test data and evaluate

    # Create a KerasClassifier
    model = KerasClassifier(build_fn = create_model, epochs = 50, 
                 batch_size = 128, verbose = 0)

    # Calculate the accuracy score for each fold
    kfolds = cross_val_score(model, X, y, cv = 3)

    # Print the mean accuracy
    print('The mean accuracy was:', kfolds.mean())

    # Print the accuracy standard deviation
    print('With a standard deviation of:', kfolds.std())

## Advanced Model Architectures

### accessing layers, inputs, ouputs and weights

    first_layer = model.layers[0]
    print(first_layer.input)
    print(first_layer.output)
    print(first_layer.weights)
    
### tracking layer inputs to outputs

    # Import keras backend
    import keras.backend as K

    # Input tensor from the 1st layer of the model
    inp = model.layers[0].input

    # Output tensor from the 1st layer of the model
    out = model.layers[0].output

    # Define a function from inputs to outputs
    inp_to_out = K.function([inp],[out])

    # Print the results of passing X_test through the 1st layer
    print(inp_to_out([X_test]))

### autoencoders

- dimensionality reduction
- de-noising data
- anomoly detection

#### build a model

    # Start with a sequential model
    autoencoder = Sequential()

    # Add a dense layer with the original image as input
    autoencoder.add(Dense(32, input_shape=(784, ), activation="relu"))

    # Add an output layer with as many nodes as the image
    autoencoder.add(Dense(784, activation="sigmoid"))

    # Compile your model
    autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')

    # Take a look at your model structure
    autoencoder.summary()
    
#### build encoder

    # Build your encoder
    encoder = Sequential()
    encoder.add(autoencoder.layers[0])

    # Encode the images and show the encodings
    preds = encoder.predict(X_test_noise)
    show_encodings(preds)
    
### Categorical Embeddings

- inputs are integers, outputs are floats
- increases dimensionality, flatten back output layer to 2D array
- useful with high cardinality categorical data, images and text

        # Imports
        from keras.layers import Input, Embedding, Flatten
        from keras.models import Model

        # Create an input layer for the team ID
        teamid_in = Input(shape=(1,))

        # Lookup the input in the team strength embedding layer
        strength_lookup = team_lookup(teamid_in)

        # Flatten the output
        strength_lookup_flat = Flatten()(strength_lookup)

        # Combine the operations into a single, re-usable model
        team_strength_model = Model(teamid_in, strength_lookup_flat, name='Team-Strength-Model')
        
### Multiple Inputs

#### Shared Layers

- require functional API
- flexible
- two input layers -> shared layer -> two output layers

        # Load the input layer from keras.layers
        from keras.layers import Input

        # Input layer for team 1
        team_in_1 = Input((1,), name='Team-1-In')

        # Separate input layer for team 2
        team_in_2 = Input((1,),name='Team-2-In')
        
        # Lookup team 1 in the team strength model
        team_1_strength = team_strength_model(team_in_1)

        # Lookup team 2 in the team strength model
        team_2_strength = team_strength_model(team_in_2)
        
 #### Merge Layers
 
 - add, subtract, multiply, concatenate layers
 - two input layers -> one merged layer -> one output layer
 
         # Import the Subtract layer from keras
        from keras.layers import Subtract

        # Create a subtract layer using the inputs from the previous exercise
        score_diff = Subtract()([team_1_strength, team_2_strength])
        
        # Create the model
        model = Model([team_in_1, team_in_2], score_diff)

        # Compile the model
        model.compile(optimizer = 'adam', loss= 'mean_absolute_error')
     
#### Fit two inputs
     
        # Get the team_1 column from the regular season data
        input_1 = games_season['team_1']

        # Get the team_2 column from the regular season data
        input_2 = games_season['team_2']

        # Fit the model to input 1 and 2 using a list, using score diff as a target
        model.fit([input_1, input_2],
                  games_season['score_diff'],
                  epochs=1,
                  batch_size=2048,
                  validation_split=0.1,
                  verbose=True)
                  
          # Evaluate the model using these inputs
        print(model.evaluate([input_1, input_2], games_season['score_diff'], verbose=False))
        
#### Three or More Layers

- can use combinations of shared and merged layers

        # Create an Input for each team
        team_in_1 = Input(shape=(1,), name='Team-1-In')
        team_in_2 = Input(shape=(1,), name='Team-2-In')

        # Create an input for home vs away
        home_in = Input(shape=(1,), name='Home-In')

        # Lookup the team inputs in the team strength model
        team_1_strength = team_strength_model(team_in_1)
        team_2_strength = team_strength_model(team_in_2)

        # Combine the team strengths with the home input using a Concatenate layer, then add a Dense layer
        out = Concatenate()([team_1_strength, team_2_strength, home_in])
        out = Dense(1)(out)
        
        # Import the model class
        from keras.models import Model

        # Make a Model
        model = Model([team_in_1, team_in_2, home_in], out)

        # Compile the model
        model.compile(optimizer='adam', loss='mean_absolute_error')
        
        # Fit the model to the games_season dataset
        model.fit([games_season['team_1'], games_season['team_2'], games_season['home']],
                  games_season['score_diff'],
                  epochs=1,
                  verbose=True,
                  validation_split=0.1,
                  batch_size=2048)

        # Evaluate the model on the games_tourney dataset
        print(model.evaluate([games_tourney['team_1'], games_tourney['team_2'], games_tourney['home']],
                  games_tourney['score_diff'], verbose=False))
                  
### Stacking Models

- using predictions from models as inputs for new model

        # Predict one model on three inputs
        games_tourney['pred'] = model.predict([games_tourney['team_1'], games_tourney['team_2'], games_tourney['home']])
        
        # Create an input layer with 3 columns
        input_tensor = Input((3,))

        # Pass it to a Dense layer with 1 unit
        output_tensor = Dense(1)(input_tensor)

        # Create a model
        model = Model(input_tensor, output_tensor)

        # Compile the model
        model.compile(optimizer='adam', loss='mean_absolute_error')
        
        # Fit the model using predicted input
        model.fit(games_tourney_train[['home', 'seed_diff', 'pred']],
                  games_tourney_train['score_diff'],
                  epochs=1,
                  verbose=True)
                  
        # Evaluate the model on the games_tourney_test dataset
        print(model.evaluate(games_tourney_test[['home', 'seed_diff', 'prediction']],
                   games_tourney_test['score_diff'], verbose=False))
                   
### Two Output Models

- predicting multiple tragets from a model

        # Define the input with a shape of 2
        input_tensor = Input((2,))

        # Define the multiple outputs
        output_tensor = Dense(2,)(input_tensor)

        # Create a model
        model = Model(input_tensor, output_tensor)

        # Compile the model
        model.compile(optimizer='adam', loss= 'mean_absolute_error')
        
        # Fit the model
        model.fit(games_tourney_train[['seed_diff', 'pred']],
                  games_tourney_train[['score_1', 'score_2']],
                  verbose=True,
                  epochs=100,
                  batch_size=16384)
                  
        # Print the model's weights
        print(model.get_weights())

        # Print the column means of the training data
                print(games_tourney_train.mean())

                # Evaluate the model on the tournament test data
        print(model.evaluate(games_tourney_test[['seed_diff', 'pred']], games_tourney_test[['score_1', 'score_2']], verbose=False))
        
### Performing Regression and Classification in One Model

- need to specify different losses for regression and classification
- need to input 2 target classes

        # Create an input layer with 2 columns
        input_tensor = Input((2,))

        # Create the first output (regression)
        output_tensor_1 = Dense(1, activation='linear', use_bias=False)(input_tensor)

        # Create the second output (use the first output as input here) (classification)
        output_tensor_2 = Dense(1, activation='sigmoid', use_bias=False)(output_tensor_1)

        # Create a model with 2 outputs
        model = Model(input_tensor, [output_tensor_1, output_tensor_2])
        
        # Import the Adam optimizer
        from keras.optimizers import Adam

        # Compile the model with 2 losses and the Adam optimzer with a higher learning rate
        model.compile(loss=['mean_absolute_error', 'binary_crossentropy'], optimizer=Adam(lr=0.01))

        # Fit the model to the tournament training data, with 2 inputs and 2 outputs
        model.fit(games_tourney_train[['seed_diff', 'pred']],
                  [games_tourney_train[['score_diff']], games_tourney_train[['won']]],
                  epochs=10,
                  verbose=True,
                  batch_size=16384)
                  
        # Print the model weights
        print(model.get_weights())

        # Print the training data means
        print(games_tourney_train.mean())
        
        # Import the sigmoid function from scipy
        from scipy.special import expit as sigmoid

        # Weight from the model
        weight = 0.14

        # Print the approximate win probability predicted close game
        print(sigmoid(1 * weight))

        # Print the approximate win probability predicted blowout game
        print(sigmoid(10 * weight))
        
        # Evaluate the model on new data
        print(model.evaluate(games_tourney_test[['seed_diff', 'pred']],
                       [games_tourney_test[['score_diff']], games_tourney_test[['won']]], verbose=False))

## CNN with Keras

### standard Conv2D model

    # Import the Conv2D and Flatten layers and instantiate model
    from keras.layers import Conv2D,Flatten
    model = Sequential()

    # Add a convolutional layer of 32 filters of size 3x3
    model.add(Conv2D(32, input_shape=(28, 28, 1), kernel_size=3, activation='relu'))

    # Add a convolutional layer of 16 filters of size 3x3
    model.add(Conv2D(16, kernel_size=3, activation='relu'))

    # Flatten the previous layer output
    model.add(Flatten())

    # Add as many outputs as classes with softmax activation
    model.add(Dense(10, activation='softmax'))
    
    # Obtain a reference to the outputs of the first layer
    layer_output = model.layers[0].output

    # Build a model using the model's input and the first layer output
    first_layer_model = Model(inputs = model.input, outputs = layer_output)

    # Use this model to predict on X_test
    activations = first_layer_model.predict(X_test)

    # Plot the activations of first digit of X_test for the 15th filter
    axs[0].matshow(activations[0,:,:,14], cmap = 'viridis')

    # Do the same but for the 17th filter now
    axs[1].matshow(activations[0,:,:,16], cmap = 'viridis')
    plt.show()

### using ResNet trained model to classify images

    # Import image and preprocess_input
    from keras.preprocessing import image
    from keras.applications.resnet50 import preprocess_input

    # Load the image with the right target size for your model
    img = image.load_img(img_path, target_size=(224, 224))

    # Turn it into an array
    img_array = image.img_to_array(img)

    # Expand the dimensions of the image
    img_expanded = np.expand_dims(img_array, axis = 0)

    # Pre-process the img in the same way original images were
    img_ready = preprocess_input(img_expanded)
    
    # Instantiate a ResNet50 model with imagenet weights
    model = ResNet50(weights='imagenet')

    # Predict with ResNet50 on your already processed img
    preds = model.predict(img_ready)

    # Decode predictions
    print('Predicted:', decode_predictions(preds, top=3)[0])
    
### padding
- adding an additional layer of 0 value pixels
- assists in aligning input with output
- added as an input argument

        #making output size same as input
        padding = 'same'

### striding
- kernel will jump the stated number
- allows smaller output than input
- added as an input argument
        
        #adding strides
        strides = 2
        
### dilated
- skipping kernels between 
- good for values varying in scale

        #adding dilation
        dilation_rate = 2

### formula for anticipated output

O = ((I - K +2P)/S + 1

- O = output size
- I = input size (pixels)
- K = size of kernel (pixels)
- P = size of padding
- S = strides

### maxpool2D
- reduces the # of parameters by pooling pixels together to equate to one value
- maxpool2D layer added after each conv2D layer

### storing and loading saved weights
- use callback method to save best weight

        #from keras.callbacks import ModelCheckpoint
        # This checkpoint object will store the model parameters
        # in the file "weights.hdf5"
        checkpoint = ModelCheckpoint('weights.hdf5'
        , monitor=
        'val_loss'
        ,
        save_best_only=True)
        # Store in a list to be used during training
        callbacks_list = [checkpoint]
        # Fit the model on a training set, using the checkpoint as a
        #callback
        model.fit(train_data, train_labels, validation_split=0.2,
        epochs=3, callbacks=callbacks_list)
        
        model.load_weights('weights.hdf5')
        model.predict_classes(test_data)
        
### dropout
- assists in regularization
- selects a subset of units
- ignore it in the first pass and in the back-propagation error
- added after a layer where we want units ignored

### batch normalization
- rescales the output to normalize
- added after a layer that should be normalized

### there is disharmony between dropout and batch normalization
- avoid using together

### Accessing parts of the model for visual interpretation

    # Load the weights into the model
    model.load_weights('weights.hdf5')

    # Get the first convolutional layer from the model
    c1 = model.layers[0]

    # Get the weights of the first convolutional layer
    weights1 = c1.get_weights()

    # Pull out the first channel of the first kernel in the first layer
    kernel = weights1[0][...,0, 0]
    print(kernel)
    
    import matplotlib.pyplot as plt

    # Convolve with the fourth image in test_data
    out = convolution(test_data[3, :, :, 0], kernel)

    # Visualize the result
    plt.imshow(out)
    plt.show()

## LSTM with Keras

- RNN network (text)
- Long / Short Term Memory
- when to use
    - image captions
    - speech to text
    - translation
    - document summaries
    - text generation
    - musical composition
 
 ### prep data
 
        #Split text into an array of words 
        words = text.split()

        #Make lines of 4 words each, moving one word at a time
        lines = []
        for i in range(4, len(words)):
          lines.append(' '.join(words[i-4:i]))

        #Instantiate a Tokenizer, then fit it on the lines
        tokenizer = Tokenizer()
        tokenizer.fit_on_texts(lines)

        #Turn lines into a sequence of numbers
        sequences = tokenizer.texts_to_sequences(lines)
        print("Lines: \n {} \n Sequences: \n {}".format(lines[:5],sequences[:5]))

### build LSTM model

            #Import the Embedding, LSTM and Dense layer
        from keras.layers import Embedding, LSTM, Dense

        model = Sequential()

            #Add an Embedding layer with the right parameters
        model.add(Embedding(input_dim=vocab_size, output_dim=8, input_length=3))

            #Add a 32 unit LSTM layer
        model.add(LSTM(32))

            #Add a hidden Dense layer of 32 units and an output layer of vocab_size with softmax
        model.add(Dense(32, activation='relu'))
        model.add(Dense(vocab_size, activation='softmax'))
        model.summary()
    
### decode predictions

            def predict_text(test_text):
              if len(test_text.split())!=3:
                print('Text input should be 3 words!')
                return False

              # Turn the test_text into a sequence of numbers
              test_seq = tokenizer.texts_to_sequences([test_text])
              test_seq = np.array(test_seq)

              # Get the model's next word prediction by passing in test_seq
              pred = model.predict(test_seq).argmax(axis = 1)[0]

              # Return the word associated to the predicted index
              return tokenizer.index_word[pred]

            # use the predict function to return the next word
            predict_text('enter_text_here')

## Visualizing Loss vs. Validation Loss with Matplotlib

    import matplotlib.pyplot as plt

    # Train a model and store the training object
    training = model.fit(train_data, train_labels, validation_split=0.2, epochs=3, batch_size=10)
    
    # Extract the history from the training object
    history = training.history

    # Plot the training loss 
    plt.plot(history['loss'])
    # Plot the validation loss
    plt.plot(history['val_loss'])

    # Show the figure
    plt.show()
