![](https://storage.googleapis.com/kaggle-competitions/kaggle/10338/logos/header.png)

Today, we'll use neural networks for pneumonia detection! We'll practice creating toy neural networks, apply neural networks (including Convolutional Neural Nets!) to our pneumonia data, and experiment with *transfer learning*: learning from an existing "expert network".

In this notebook we'll be:
1.   Building Neural Networks with Keras
2.   Implementing Transfer Learning
3.   Evalulating our ML models



In [None]:
#@title Run this to download data and prepare our environment! { display-mode: "form" }
def augment(data, augmenter):
  if len(data.shape) == 3:
    return augmenter.augment_image(data)
  if len(data.shape) == 4:
    return augmenter.augment_images(data)

def rotate(data, rotate):
  fun = augmenters.Affine(rotate = rotate)
  return augment(data, fun)

def shear(data, shear):
  fun = augmenters.Affine(shear = shear)
  return augment(data, fun)

def scale(data, scale):
  fun = augmenters.Affine(scale = shear)
  return augment(data, fun)

def flip_left_right(data):
  fun = augmenters.Fliplr()
  return augment(data, fun)

def flip_up_down(data):
  fun = augmenters.Flipud()
  return augment(data, fun)

def remove_color(data, channel):
  new_data = data.copy()
  if len(data.shape) == 3:
    new_data[:,:,channel] = 0
    return new_data
  if len(data.shape) == 4:
    new_data[:,:,:,channel] = 0
    return new_data

class pkg:
  #### DOWNLOADING AND LOADING DATA
  def get_metadata(metadata_path, which_splits = ['train', 'test']):
    '''returns metadata dataframe which contains columns of:
       * index: index of data into numpy data
       * class: class of image
       * split: which dataset split is this a part of?
    '''
    metadata = pd.read_csv(metadata_path)
    keep_idx = metadata['split'].isin(which_splits)
    return metadata[keep_idx]

  def get_data_split(split_name, flatten, all_data, metadata, image_shape):
    '''
    returns images (data), labels from folder of format [image_folder]/[split_name]/[class_name]/
    flattens if flatten option is True
    '''
    sub_df = metadata[metadata['split'].isin([split_name])]
    index  = sub_df['index'].values
    labels = sub_df['class'].values
    data = all_data[index,:]
    if flatten:
      data = data.reshape([-1, np.product(image_shape)])
    return data, labels

  def get_train_data(flatten, all_data, metadata, image_shape):
    return get_data_split('train', flatten, all_data, metadata, image_shape)

  def get_test_data(flatten, all_data, metadata, image_shape):
    return get_data_split('test', flatten, all_data, metadata, image_shape)

  def get_field_data(flatten, all_data, metadata, image_shape):
    return get_data_split('field', flatten, all_data, metadata, image_shape)

class helpers:
  #### PLOTTING
  def plot_one_image(data, labels = [], index = None, image_shape = [64,64,3]):
    '''
    if data is a single image, display that image

    if data is a 4d stack of images, display that image
    '''
    num_dims   = len(data.shape)
    num_labels = len(labels)

    # reshape data if necessary
    if num_dims == 1:
      data = data.reshape(target_shape)
    if num_dims == 2:
      data = data.reshape(np.vstack[-1, image_shape])
    num_dims   = len(data.shape)

    # check if single or multiple images
    if num_dims == 3:
      if num_labels > 1:
        print('Multiple labels does not make sense for single image.')
        return

      label = labels
      if num_labels == 0:
        label = ''
      image = data

    if num_dims == 4:
      image = data[index, :]
      label = labels[index]

    # plot image of interest
    print('Label: %s'%label)
    plt.imshow(image)
    plt.show()

  #### QUERYING AND COMBINING DATA
  def get_misclassified_data(data, labels, predictions):
    '''
    Gets the data and labels that are misclassified in a classification task
    Returns:
    -missed_data
    -missed_labels
    -predicted_labels (corresponding to missed_labels)
    -missed_index (indices of items in original dataset)
    '''
    missed_index     = np.where(np.abs(predictions.squeeze() - labels.squeeze()) > 0)[0]
    missed_labels    = labels[missed_index]
    missed_data      = data[missed_index,:]
    predicted_labels = predictions[missed_index]
    return missed_data, missed_labels, predicted_labels, missed_index

  def combine_data(data_list, labels_list):
    return np.concatenate(data_list, axis = 0), np.concatenate(labels_list, axis = 0)

  def model_to_string(model):
    import re
    stringlist = []
    model.summary(print_fn=lambda x: stringlist.append(x))
    sms = "\n".join(stringlist)
    sms = re.sub('_\d\d\d','', sms)
    sms = re.sub('_\d\d','', sms)
    sms = re.sub('_\d','', sms)
    return sms

  def plot_acc(history, ax = None, xlabel = 'Epoch #'):
    # i'm sorry for this function's code. i am so sorry.
    history = history.history
    history.update({'epoch':list(range(len(history['val_accuracy'])))})
    history = pd.DataFrame.from_dict(history)

    best_epoch = history.sort_values(by = 'val_accuracy', ascending = False).iloc[0]['epoch']

    if not ax:
      f, ax = plt.subplots(1,1)
    sns.lineplot(x = 'epoch', y = 'val_accuracy', data = history, label = 'Validation', ax = ax)
    sns.lineplot(x = 'epoch', y = 'accuracy', data = history, label = 'Training', ax = ax)
    ax.axhline(0.5, linestyle = '--',color='red', label = 'Chance')
    ax.axvline(x = best_epoch, linestyle = '--', color = 'green', label = 'Best Epoch')
    ax.legend(loc = 1)
    ax.set_ylim([0.4, 1])

    ax.set_xlabel(xlabel)
    ax.set_ylabel('Accuracy (Fraction)')

    plt.show()

class models:
  def DenseClassifier(hidden_layer_sizes, nn_params, dropout = 1):
    model = Sequential()
    model.add(Flatten(input_shape = nn_params['input_shape']))
    for ilayer in hidden_layer_sizes:
      model.add(Dense(ilayer, activation = 'relu'))
      if dropout:
        model.add(Dropout(dropout))
    model.add(Dense(units = nn_params['output_neurons'], activation = nn_params['output_activation']))
    model.compile(loss=nn_params['loss'],
                  optimizer=optimizers.SGD(lr=1e-4, momentum=0.95),
                  metrics=['accuracy'])
    return model

  def CNNClassifier(num_hidden_layers, nn_params, dropout = 1):
    model = Sequential()

    model.add(Conv2D(32, (3, 3), input_shape=nn_params['input_shape'], padding = 'same'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    for i in range(num_hidden_layers-1):
        model.add(Conv2D(32, (3, 3), padding = 'same'))
        model.add(Activation('relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())

    model.add(Dense(units = 128, activation = 'relu'))
    model.add(Dropout(dropout))

    model.add(Dense(units = 64, activation = 'relu'))


    model.add(Dense(units = nn_params['output_neurons'], activation = nn_params['output_activation']))

    # initiate RMSprop optimizer
    opt = keras.optimizers.rmsprop(lr=1e-4, decay=1e-6)

    # Let's train the model using RMSprop
    model.compile(loss=nn_params['loss'],
                  optimizer=opt,
                  metrics=['accuracy'])
    return model

  def TransferClassifier(name, nn_params, trainable = True):
    expert_dict = {'VGG16': VGG16,
                   'VGG19': VGG19,
                   'ResNet50':ResNet50,
                   'DenseNet121':DenseNet121}

    expert_conv = expert_dict[name](weights = 'imagenet',
                                              include_top = False,
                                              input_shape = nn_params['input_shape'])
    for layer in expert_conv.layers:
      layer.trainable = trainable

    expert_model = Sequential()
    expert_model.add(expert_conv)
    expert_model.add(GlobalAveragePooling2D())

    expert_model.add(Dense(128, activation = 'relu'))
    expert_model.add(Dropout(0.3))

    expert_model.add(Dense(64, activation = 'relu'))

    expert_model.add(Dense(nn_params['output_neurons'], activation = nn_params['output_activation']))

    expert_model.compile(loss = nn_params['loss'],
                  optimizer = optimizers.SGD(lr=1e-4, momentum=0.95),
                  metrics=['accuracy'])

    return expert_model

import gdown
import zipfile

import os
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn import model_selection

from collections import Counter

import keras
from keras.models import Sequential
from keras.layers import Activation, MaxPooling2D, Dropout, Flatten, Reshape, Dense, Conv2D, GlobalAveragePooling2D
from keras.wrappers.scikit_learn import KerasClassifier
import keras.optimizers as optimizers
from keras.callbacks import ModelCheckpoint

from keras.applications.vgg16 import VGG16
from keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.resnet50 import ResNet50
from keras.applications.densenet import DenseNet121

from imgaug import augmenters

### defining project variables
# file variables
metadata_url         = "https://storage.googleapis.com/training-labs/metadata.csv"
image_data_url       = 'https://storage.googleapis.com/training-labs/image_data.npy'
image_data_path      = './image_data.npy'
metadata_path        = './metadata.csv'
image_shape          = (64, 64, 3)

# neural net parameters
nn_params = {}
nn_params['input_shape']       = image_shape
nn_params['output_neurons']    = 1
nn_params['loss']              = 'binary_crossentropy'
nn_params['output_activation'] = 'sigmoid'

###
# gdown.download(image_data_url, './image_data.npy', True)
# gdown.download(metadata_url, './metadata.csv', True)
!wget "https://storage.googleapis.com/training-labs/metadata.csv"
!wget "https://storage.googleapis.com/training-labs/image_data.npy"

### pre-loading all data of interest
_all_data = np.load('image_data.npy')
_metadata = pkg.get_metadata(metadata_path, ['train','test','field'])

### preparing definitions
# downloading and loading data
get_data_split = pkg.get_data_split
get_metadata    = lambda :                 pkg.get_metadata(metadata_path, ['train','test'])
get_train_data  = lambda flatten = False : pkg.get_train_data(flatten = flatten, all_data = _all_data, metadata = _metadata, image_shape = image_shape)
get_test_data   = lambda flatten = False : pkg.get_test_data(flatten = flatten, all_data = _all_data, metadata = _metadata, image_shape = image_shape)
get_field_data  = lambda flatten = False : pkg.get_field_data(flatten = flatten, all_data = _all_data, metadata = _metadata, image_shape = image_shape)

# plotting
plot_one_image = lambda data, labels = [], index = None: helpers.plot_one_image(data = data, labels = labels, index = index, image_shape = image_shape);
plot_acc       = lambda history: helpers.plot_acc(history)

# querying and combining data
model_to_string        = lambda model: helpers.model_to_string(model)
get_misclassified_data = helpers.get_misclassified_data;
combine_data           = helpers.combine_data;

# models with input parameters
DenseClassifier     = lambda hidden_layer_sizes: models.DenseClassifier(hidden_layer_sizes = hidden_layer_sizes, nn_params = nn_params);
CNNClassifier       = lambda num_hidden_layers: models.CNNClassifier(num_hidden_layers, nn_params = nn_params);
TransferClassifier  = lambda name: models.TransferClassifier(name = name, nn_params = nn_params);

monitor = ModelCheckpoint('./model.h5', monitor='val_acc', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', save_freq=1)

ModuleNotFoundError: No module named 'keras.wrappers'

# **Milestone 1. Learning Neural Networks**

Now, let's apply neural networks to our medical imaging problem!

### What are neural networks?

Neural networks look something like this:

![A 2 layer neural network](https://cdn-images-1.medium.com/max/1600/1*DW0Ccmj1hZ0OvSXi7Kz5MQ.jpeg)


Each orange and blue node is a neuron. The network itself is composed of a bunch of neurons that talk to each other and eventually give us a prediction. Let's get a bit more concrete with this...

To build neural networks in Python, we use the packages known as `tensorflow` and `keras`.

In [None]:
# grab tools from our tensorflow and keras toolboxes
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
from keras import optimizers

## Exercise (Coding): A 2-Layer Model


We're going to build this model:

![](http://cs231n.github.io/assets/nn1/neural_net.jpeg)

This network can be described as:
* Input Layer: 3
* Layer 1 (Hidden): 4 neurons with the `'relu'` activation function
* Layer 2 (Output): 2 neurons with the `'linear'` activation function

We're going to set up a **Sequential** model by adding on a sequence of layers.

Each layer will be **Dense**, meaning each neuron of the previous layer connects to each neuron of this layer.

We'll compile our model to make it ready to use! We'll use:
- `loss = 'binary_crossentropy'` (how to measure the model's performance while it trains)
- `optimizer = 'adam'` (an algorithm for adjusting the weights)
- `metric = 'accuracy'` (how to measure the model's performance at the end)

Try it out below!

In [None]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
from keras import optimizers
model_1 = Sequential()
model_1.add(Dense(units=64, input_shape=(4,), activation='relu'))
model_1.add(Dense(units=32, activation='relu'))
model_1.add(Dense(units=34, activation='softmax'))
model_1.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [None]:
#@title Solution { display-mode: "form" }
# Fill in the blanks with your group!
### YOUR CODE HERE:
model_1_answer = Sequential()
model_1_answer.add(Dense(4, input_shape = (3,), activation = 'relu'))
model_1_answer.add(Dense(2, activation = 'linear'))
model_1_answer.compile(loss='binary_crossentropy',
optimizer = 'adam',
metrics = ['accuracy'])
model_1 = model_1_answer
### END CODE

In [None]:
#@title Double-click here if you want to read more detail!
"""
Let's walk though what each of these lines of code means!

**1. Specify model**

```
model = Sequential()
```
In this line of code, we build our network where the information flows from LEFT to RIGHT through the network in ONE DIRECTION as opposed to multiple directions. Neurons on the right never pass informations to neurons on the left of it.


**2. Add layers to the network**
```
model.add(Dense(4,input_shape = (3,), activation = 'sigmoid'))
```
In this code, we `add` a `layer` of neurons to our network.

This layers consists of 4 neurons. Each neuron is DENSE and connects to all of the previous layer's inputs and all of the subsequent layers outputs. We specify that there are 3 inputs here.

We also specify what kind of output the neuron will give. If you want the neuron to output a number between 0 and 1 (like a probability!) you would use 'softmax' or 'sigmoid'. If you want the neuron to output any number, you can use 'linear'! You'll also often see 'relu', which is when a neuron will only output positive numbers.

```
model.add(Dense(1, activation = 'linear'))
```
This code adds ANOTHER layer to the network that has 1 neuron. This one neuron is used to predict a continuous value!

**3. Turn the model on by compiling it**

After having built the network, we want to train and use it, so we have to 'turn it on' and 'compile' it. To turn it on, we have to specify at the very least, a loss, an optimizer, and some ways of evaluating the model (metrics). Don't worry too much about what this means! Just know that this is necessary.

```
model.compile(loss='mean_squared_error',
optimizer = 'adam',
metrics = ['mean_squared_error'])
  ```
"""

In [None]:
#@title Run this to test if your model is right!
model_1_answer = Sequential()
model_1_answer.add(Dense(4, input_shape = (3,), activation = 'relu'))
model_1_answer.add(Dense(2, activation = 'linear'))
model_1_answer.compile(loss='binary_crossentropy',
optimizer = 'adam',
metrics = ['accuracy'])

model_1_config = model_1.get_config()

del model_1_config["name"]
for layer in model_1_config["layers"]:
  del layer["config"]["name"]

model_1_answer_config = model_1_answer.get_config()

del model_1_answer_config["name"]
for layer in model_1_answer_config["layers"]:
  del layer["config"]["name"]

if model_1_answer_config == model_1_config:
  print('Good job! Your model worked')
else:
  print('Please check your code again!')

# Old Code
# if model_to_string(model_1) == model_to_string(model_1_answer):
#   print('Good job! Your model worked')
# else:
#   print('Please check your code again!')

This is a toy example, so we won't train our model with real data - but we can feed in some fake inputs to see what happens! **How many inputs do we need?**


In [None]:
input_data = [[[]]] #Fill in inputs here! How many?

In [None]:
#@title Sample Solution
input_data = [[[3,4,3]]] #Fill in inputs here! How many?

Let's try it out! What do **predict** and **predict_classes** do? How do you interpret the outputs?

In [None]:
print(model_1.predict(3))
print((model_1.predict(4) > 0.5).astype("int32"))

# **Milestone 2. Exploring Neural Networks**

Now, let's apply neural networks to our medical imaging problem!


In our problem, we are given `images` of shape `(64,64,3)`, each assigned a label PNEUMONIA or HEALTHY. We want to identify the key things that we need to design our network.

Understand these points:

* What are our inputs?
* What is/are our outputs?

**Here are the steps to creating a neural network:**

1. Ready the training data/labels and testing data/labels
2. Initiate a model using a certain classifier
3. Train the model using the training data and labels
4. Predict the outputs using the test data
5. Score the model by comparing the test labels with the predictions

**Here is an example of code that creates a neural network, with comments:**

This neural network uses Scikit-learn (used for regression problems). [MLPClassifier](https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html) is a type of neural network algorithm, so you can create a simple neural network! Click on the hyperlink to learn more about them if you want.

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score  # Import accuracy_score function
model = MLPClassifier(hidden_layer_sizes=(5))
model.fit(train_data, train_labels)
predictions = model.predict(test_data)
score = accuracy_score(test_labels, predictions)
print(score)


# **Milestone 3. Diving Deeper into Neural Networks**

Now, let's explore more complex neural networks and apply them to our medical imaging problem.

Let's try out 'Convolutional Neural Networks'! [Convolutional neural networks](https://www.tensorflow.org/tutorials/images/cnn) are networks that process images much like our visual system does. Click on the hyperlink to learn more about them if you want.

We'll use a Keras wrapper that abstracts away the details.

First, let's get our data.



In [None]:
train_data, train_labels = get_train_data()
test_data, test_labels = get_test_data()

### Creating Models
Now, let's create a model. In fact, let's create two:


**For a "vanilla" neural network:**

```
dense = DenseClassifier(hidden_layer_sizes = (64,32))
```
Arguments:
* hidden_layer_sizes: the number of neurons in each hidden layer
* epochs: the number of times that our network trains on the whole training manual


---


**For a convolutional neural network:**
```
cnn = CNNClassifier(num_hidden_layers = 1)
```
Arguments:
* num_hidden_layers: the number of hidden layers

**Create your models below!** Use any hidden layer sizes you like.

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten

# Define the necessary values
num_classes = 10  # Replace with the actual number of classes in your classification problem
image_height = 128  # Replace with the height of your input images
image_width = 128   # Replace with the width of your input images
num_channels = 3    # Replace with the number of channels in your input images

class DenseClassifier:
    def __init__(self, hidden_layer_sizes):
        self.hidden_layer_sizes = hidden_layer_sizes
        self.model = self.build_model()

    def build_model(self):
        model = Sequential()
        for size in self.hidden_layer_sizes:
            model.add(Dense(units=size, activation='relu'))
        model.add(Dense(units=num_classes, activation='softmax'))
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model

class CNNClassifier:
    def __init__(self, num_hidden_layers):
        self.num_hidden_layers = num_hidden_layers
        self.model = self.build_model()

    def build_model(self):
        model = Sequential()
        model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(image_height, image_width, num_channels)))
        for _ in range(self.num_hidden_layers):
            model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
        model.add(Flatten())
        model.add(Dense(units=num_classes, activation='softmax'))
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model

# Create instances of the classifiers with different hidden layer sizes
dense_hidden_layer_sizes = [128, 64, 32]  # Example hidden layer sizes for DenseClassifier
cnn_num_hidden_layers = 2  # Example number of hidden layers for CNNClassifier

dense = DenseClassifier(hidden_layer_sizes=dense_hidden_layer_sizes)
cnn = CNNClassifier(num_hidden_layers=cnn_num_hidden_layers)


In [None]:
#@title Sample Solution
dense = DenseClassifier(hidden_layer_sizes = (64,32))
cnn = CNNClassifier(num_hidden_layers = 3)

### Fitting and Scoring
Now, let's fit  our models!

There are default parameters to `.fit` you can call:

```
model_history = model.fit(train_data, train_labels, epochs = 100, validation_data = (test_data, test_labels), shuffle = True, callbacks = [monitor])
```

The `shuffle` parameter is important for shuffling the training data before each epoch. The `monitor` callback is used to get a view on internal states and statistics of the model during training. Please don't change these parameters!

**Fit your models below!**

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Define a Dense Classifier
class DenseClassifier(models.Model):
    def __init__(self, hidden_layer_sizes, num_classes):
        super(DenseClassifier, self).__init__()
        self.flatten = layers.Flatten()
        self.dense_layers = []
        for units in hidden_layer_sizes:
            self.dense_layers.append(layers.Dense(units, activation='relu'))
        self.output_layer = layers.Dense(num_classes, activation='softmax')

    def call(self, inputs):
        x = self.flatten(inputs)
        for layer in self.dense_layers:
            x = layer(x)
        return self.output_layer(x)

# Define a CNN Classifier
class CNNClassifier(models.Model):
    def __init__(self, num_hidden_layers, num_classes):
        super(CNNClassifier, self).__init__()
        self.conv_layers = []
        for _ in range(num_hidden_layers):
            self.conv_layers.append(layers.Conv2D(32, (3, 3), activation='relu'))
            self.conv_layers.append(layers.MaxPooling2D((2, 2)))
        self.flatten = layers.Flatten()
        self.fc_layer = layers.Dense(64, activation='relu')
        self.output_layer = layers.Dense(num_classes, activation='softmax')

    def call(self, inputs):
        x = inputs
        for layer in self.conv_layers:
            x = layer(x)
        x = self.flatten(x)
        x = self.fc_layer(x)
        return self.output_layer(x)

# Usage Example:
hidden_layer_sizes = (64, 32)
num_classes = 10  # Replace with the number of classes in your classification task

dense = DenseClassifier(hidden_layer_sizes=hidden_layer_sizes, num_classes=num_classes)
cnn = CNNClassifier(num_hidden_layers=3, num_classes=num_classes)


###Scoring

Now, let's evaluate our models! To get the scores, you can use:
```
score = model.evaluate(test_data, test_labels, verbose=0)
```

Then `score[0]` will be test loss and `score[1]` will be test accuracy.



In [None]:
import ast
import codeop

class CodeParser:
    def __init__(self):
        # Initialize compiler flags to default state
        self.reset_compiler_flags()

    def ast_parse(self, source, filename='<unknown>', symbol='exec'):
        """Parse code to an AST with the current compiler flags active.

        Arguments are exactly the same as ast.parse (in the standard library),
        and are passed to the built-in compile function."""
        return compile(source, filename, symbol, self.flags | codeop.PyCF_ONLY_AST, 1)

    def reset_compiler_flags(self):
        """Reset compiler flags to default state."""
        # This value is copied from codeop.Compile.__init__, so if that ever
        # changes, it will need to be updated.
        self.flags = codeop.PyCF_DONT_IMPLY_DEDENT

    @property
    def compiler_flags(self):
        """Flags currently active in the compilation process."""
        return self.flags



### Plotting

A great way to understand our model better is to plot the training and test accuracy over time with `plot_acc(model_history)`.

**What do you observe of the training and test accuracy over the training epochs?**

In [None]:
import matplotlib.pyplot as plt

def plot_acc(history):
    # Extracting accuracy values from the history dictionary
    train_acc = history['accuracy']
    test_acc = history['val_accuracy']

    epochs = range(1, len(train_acc) + 1)

    plt.figure(figsize=(10, 6))
    plt.plot(epochs, train_acc, 'b', label='Training Accuracy')
    plt.plot(epochs, test_acc, 'r', label='Test Accuracy')
    plt.title('Training and Test Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()

In [None]:
#@title Sample Solution
plot_acc(dense_history)
plot_acc(cnn_history)


# **Milestone 4. Expert models: Transfer learning**

For all of the machine learning we've done thus far, we've used models that were built from 'scratch'. All of these models are like newborn babies that have neither seen nor explored the world.

Unfortunately, our training manual is pretty small to all the things in the big wide world. So, just training on our manual is going to be inherently limited.


Luckily, there are **"experts"** who have much more training! While these **"experts"** haven't seen our task, they have experience with a lot of other things. We can hand them our training manual and reasonably expect that they will pick up our task fairly quickly.

In deep learning, the idea of using a model trained on another task as a starting point for your model is known as **transfer learning**.

### VGG 16

For our transfer learning, we're going to use 'experts' built upon the famous 'ImageNet' classification problem.

In ImageNet, participants were challenged to build machine learning models that could distinguish 14 million images' categories, where there were > 20,000 categories available.

Below, we see examples of 4 different categories.

![](http://cs231n.github.io/assets/trainset.jpg)



One of the experts we can use is VGG 16. VGG 16 was a network that was allowed to study the 14 million images 74 times.

After its studying, VGG 16 was able to guess something close to the real label (top-5 accuracy) better than a human can.

![](https://cdn-images-1.medium.com/max/1600/0*V1muWIDnPVwZUuEv.png)

We're going to take an expert model like VGG16 and let it train on OUR x-rays. Hopefully, their experience with those 14 million images will help it understand pneumonia from our x-rays.

### Exercise (Coding)

Let's tap an expert model to help us out with our pneumonia prediction!

We provide a wrapper that lets you 'call' up and employ expert models. You can call it like...

```
transfer = TransferClassifier(name = 'VGG16')
```

The experts we have on hand are:
* `VGG16`
* `VGG19`
* `ResNet50`
* `DenseNet121`

There are default arguments/parameters to model.fit you can call:

`model.fit(train_data, train_labels, epochs = N, validation_data = (test_data, test_labels), shuffle = True, callbacks = [monitor])`

The shuffle parameter is important for shuffling the training data before each epoch. The monitor callback is used to get a view on internal states and statistics of the model during training. Do not change these parameters!

**Experiment with using these experts. Remember to fit and score your model, and to take a look at the training history.**




In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import VGG16, VGG19, ResNet50, DenseNet121
from sklearn.model_selection import train_test_split

# Generate some example data (replace this with your actual data)
train_data = np.random.random((1000, 224, 224, 3))
train_labels = np.random.randint(2, size=(1000,))
test_data = np.random.random((200, 224, 224, 3))
test_labels = np.random.randint(2, size=(200,))

# Choose an expert model
expert_model_name = 'VGG16'

# Create the transfer wrapper with the chosen expert model
class TransferClassifier:
    def __init__(self, name):  # Corrected: double underscores for __init__
        self.name = name
        self.expert_model = self._get_expert_model()
        self.transfer_model = self._build_transfer_model()

    def _get_expert_model(self):
        if self.name == 'VGG16':
            return VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
        elif self.name == 'VGG19':
            return VGG19(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
        elif self.name == 'ResNet50':
            return ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
        elif self.name == 'DenseNet121':
            return DenseNet121(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
        else:
            raise ValueError(f"Unknown expert model: {self.name}")

    def _build_transfer_model(self):
        model = Sequential([
            self.expert_model,
            Flatten(),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
        return model

# Create the transfer classifier instance
transfer = TransferClassifier(name=expert_model_name)

# Split the data into training and validation sets
train_data, val_data, train_labels, val_labels = train_test_split(train_data, train_labels, test_size=0.1)

# Train the transfer model
epochs = 10
# monitor = None  # Replace with your specific monitor callback
transfer.transfer_model.fit(
    train_data,
    train_labels,
    epochs=epochs,
    validation_data=(val_data, val_labels),
    shuffle=True,
    # callbacks=[monitor]
)

# Evaluate the model on test data
test_loss, test_acc = transfer.transfer_model.evaluate(test_data, test_labels)
print(f'Test accuracy: {test_acc}')  # Corrected: Removed the '.' before 'correct'


In [None]:
#@title Sample Solution { display-mode: "form" }
train_data, train_labels = get_train_data()
test_data, test_labels = get_test_data()
transfer = TransferClassifier(name = 'VGG16')
transfer.fit(train_data, train_labels, epochs = 10, validation_data = (test_data, test_labels), shuffle = True, callbacks = [monitor])
plot_acc(transfer.history)

**This sample solution only uses VGG16. Now try using the other expert models.**

# **Milestone 5. Model Evaluation**



### Exercise (Coding)

Set your best model to the one you have trained (e.g., the transfer learning model).

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Sample values (replace with your actual data)
best_model = None  # Replace with your trained model
test_data = np.random.random((200, 224, 224, 3))  # Replace with your test data
test_labels = np.random.randint(2, size=(200,))  # Replace with your test labels

# Predict on the test data (placeholder predictions)
num_samples = len(test_data)
predicted_labels = np.random.randint(2, size=(num_samples,))  # Placeholder predictions

# Calculate accuracy
accuracy = accuracy_score(test_labels, predicted_labels)

# Compute confusion matrix
conf_matrix = confusion_matrix(test_labels, predicted_labels)

# Calculate precision, recall, and F1-score for each class
class_names = ['Normal', 'Pneumonia']  # Replace with your class names
report = classification_report(test_labels, predicted_labels, target_names=class_names)

# Print the results
print("Accuracy:", accuracy)
print("Confusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(report)

Total accuracy does not reflect all that we want to know about a model's performance. It's just one metric out of many possible metrics for evaluating models.

In the case of pneumonia prediction, we may be more interested in other quantities, such as 'how accurate were we on the pneumonia category?' or 'how accurate were we on the normal category?' or 'how much of pneumonia were confused for normal?' or vice versa.



Our metrics for classification can be described in terms of a 'confusion matrix', shown below.

![Confusion Matrix](https://cdn-images-1.medium.com/max/1600/1*Z54JgbS4DUwWSknhDCvNTQ.png)

In a confusion matrix, we think in terms of 'actual' and 'predicted values'. If we take Pneumonia = 1/Positive and Normal = 0/Negative, then **what do TP, FP, TN, and FN mean?**

Answer:
1. TP: True positive (True pneumonia): Pneumonia predicted as pneumonia
2. TN: True negative (True normal): Normal predicted as normal
3. FP: False positive (False pneumonia): Normal mistaken as pneumonia
4. FN: False negative (False normal): Pneumonia mistaken as normal




The `sklearn` package makes calculating confusion matrices very quick. Its `metrics` submodule actually comes with a `confusion_matrix` tool. Let's start by grabbing that.

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix

To use `confusion_matrix`, we need:
* `labels`: the labels of the data (1 - PNEUMONIA or 0 - NORMAL)
* `predictions`: what our model thinks the labels are

To get `predictions`, you'll want to use ```best_model.predict_classes(test_data)```.

Please get the `predictions`, and use `accuracy_score` to print the overall test accuracy:

In [None]:
from sklearn.metrics import accuracy_score

# Sample values (replace with your actual data)
test_labels = np.random.randint(2, size=(200,))  # Replace with your test labels
# Placeholder for predicted labels
predicted_labels = np.random.randint(2, size=(200,))  # Replace with your predicted labels

# Calculate accuracy using accuracy_score
accuracy = accuracy_score(test_labels, predicted_labels)

# Print the overall test accuracy
print("Overall Test Accuracy:", accuracy)
#(prediction

In [None]:
#@title Sample Solution
predictions = (best_model.predict(test_data) > 0.5).astype("int32")
print('Accuracy: ', accuracy_score(test_labels, predictions)*100.0)

Now let's get our confusion matrix, and split it out into true positive, true negative, false positive, and false negative!

In [None]:
confusion = confusion_matrix(test_labels, predictions)
print(confusion)

**How do you interpret each number?**

In [None]:
tp  = confusion[1][1]
tn  = confusion[0][0]
fp = confusion[0][1]
fn = confusion[1][0]

print('True positive: %d'%tp)
print('True negative: %d'%tn)
print('False positive: %d'%fp)
print('False negative: %d'%fn)

We can visualize the confusion matrix with seaborn to make it easier for our eyes...

In [None]:
# grab our plotting package
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
sns.heatmap(confusion, annot = True, fmt = 'd', cbar_kws={'label':'count'});
plt.ylabel('Actual');
plt.xlabel('Predicted');

**Now that we have our confusion matrix, let's take a step back and think about these questions**

What did our model confuse more?
* PNEUMONIA for NORMAL or...
* NORMAL for PNEUMONIA

Why do you think it might have confused one for the other?

What is more problematic? False positives or False negatives?

Which of these metrics do we want to keep low?

# End!


To recap, we learned what neural network models are, learned how to create/build them, and explored different types of neural networks. By introducing convolutions and more complex methods to our networks (making the convolutional neural networks and expert models), we can improve our models quite a lot.

![](https://storage.googleapis.com/kaggle-competitions/kaggle/10338/logos/header.png)