# Classifying Steel Quality

# ABSTRACT

# Business Problem 

# Data Understanding 

## Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import datasets, layers, models

## Global variables

In [2]:
train_data_dir = 'Steel_Images/train_images/'
# val_data_dir = 'Steel_Images/val/'
test_data_dir = 'Steel_Images/test_images/'

image_x_size = 64
image_y_size = 64

train_batch_size = 12568
# val_batch_size = 
test_batch_size = 5506

In [3]:
# val_generator = ImageDataGenerator().flow_from_directory(
#         val_data_dir, 
#         target_size=(image_x_size, image_y_size), batch_size=val_batch_size)

train_generator = ImageDataGenerator().flow_from_directory(
        train_data_dir, 
        target_size=(image_x_size, image_y_size), batch_size=train_batch_size)

test_generator = ImageDataGenerator().flow_from_directory(
        test_data_dir, 
        target_size=(image_x_size, image_y_size), batch_size=test_batch_size)

# Create the datasets
train_images, train_labels = next(train_generator)
# val_images, val_labels = next(val_generator)
test_images, test_labels = next(test_generator)

Found 0 images belonging to 0 classes.
Found 0 images belonging to 0 classes.


## Sample Images

In [4]:
#array_to_img(train_images[10])

In [5]:
#array_to_img(train_images[130])

# Data Preparation

We need to preprocess our data before it can be run through a neural network model.\

In [6]:
print(np.shape(train_images))
print(np.shape(train_labels))
# print(np.shape(val_images))
# print(np.shape(val_labels))
print(np.shape(test_images))
print(np.shape(test_labels))

(0, 64, 64, 3)
(0, 0)
(0, 64, 64, 3)
(0, 0)


##  `train_images` and `test_images`

Start with unrowing the train, validation and test images by reshaping them into an $n$ x $l$ shape.\
This will transform the image shapes from $(xxxx, 500, 500, 3)$ to $(xxxx, 750000)$

In [7]:
# Reshape the train images 
train_img_unrow = train_images.reshape(train_batch_size, -1).T
np.shape(train_img_unrow)

(0, 12568)

transform `test_images` in a similar way.

In [8]:
# Define appropriate m 
test_img_unrow = test_images.reshape(test_batch_size, -1).T

In [9]:
# Preview the shape of test_img_unrow
np.shape(test_img_unrow)

(0, 5506)

# `train_labels` and `test_labels`

In [10]:
train_labels.shape, test_labels.shape

((0, 0), (0, 0))

In [11]:
train_labels

array([], shape=(0, 0), dtype=float32)

In [12]:
# this was stored using keras.preprocessing_image, and you can get more info using the command train_generator.class_indices
train_generator.class_indices, test_generator.class_indices

({}, {})

In [13]:
train_labels_final = train_labels.T[[1]]

  train_labels_final = train_labels.T[[1]]


In [14]:
np.shape(train_labels_final)

(1, 0)

In [15]:
test_labels_final = test_labels.T[[1]]

  test_labels_final = test_labels.T[[1]]


In [16]:
np.shape(test_labels_final)

(1, 0)

1. First, use array_to_image() again on the original train_images with index 240 to look at this particular image
2. Use train_labels_final to get the 240th label

In [18]:
#array_to_img(train_images[240])

In [20]:
#train_labels_final[:,240]

## Standardize the data

Since this is a deep learning model, we want to make sure each pixel value is between 0 and 1. Therefore we  divide the entire matrix by 255 for the `train_img_unrow`,`val_img_unrow` and `test_img_unrow`. 

In [21]:
train_img_final = train_img_unrow/255
# val_img_final = val_img_unrow/255
test_img_final = test_img_unrow/255

type(train_img_unrow)

numpy.ndarray

In [22]:
train_img_final.shape

(0, 12568)

# CNN

In [24]:
# Define size we want to load out image from
image_x_size = 64
image_y_size = 64

In [25]:
val_generator = ImageDataGenerator().flow_from_directory(
        val_data_dir, 
        target_size=(image_x_size, image_y_size), batch_size=val_batch_size)

train_generator = ImageDataGenerator().flow_from_directory(
        train_data_dir, 
        target_size=(image_x_size, image_y_size), batch_size=train_batch_size)

test_generator = ImageDataGenerator().flow_from_directory(
        test_data_dir, 
        target_size=(image_x_size, image_y_size), batch_size=test_batch_size)

# Create the datasets
train_images, train_labels = next(train_generator)
val_images, val_labels = next(val_generator)
test_images, test_labels = next(test_generator)

Found 16 images belonging to 2 classes.
Found 5216 images belonging to 2 classes.
Found 624 images belonging to 2 classes.


In [26]:
# Standardize the data
train_images_mlp = (train_images / 255).astype('float32')
# val_images_mlp = (val_images / 255).astype('float32')
test_images_mlp = (test_images / 255).astype('float32')

In [28]:
# Build the model with multiple hidden layers
model_1 = models.Sequential()
model_1.add(layers.Conv2D(filters=32, 
                        kernel_size=(2,2),
                        strides=(1,1),
                        activation='relu',
                        padding = 'same',
                        input_shape=(image_x_size, image_y_size, 3),
                        data_format = 'channels_last'))
model_1.add(layers.MaxPooling2D(pool_size=(2,2),
                     strides=2))
model_1.add(layers.Flatten())        
model_1.add(layers.Dense(128))
model_1.add(layers.Activation('relu'))
model_1.add(layers.Dense(2))
model_1.add(layers.Activation('sigmoid'))

In [28]:
model_1.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 64, 64, 32)        416       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 32, 32, 32)       0         
 )                                                               
                                                                 
 flatten (Flatten)           (None, 32768)             0         
                                                                 
 dense (Dense)               (None, 128)               4194432   
                                                                 
 activation (Activation)     (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 2)                 258       
                                                        

We use the compile function to define the optimizer (adam), loss cost (binary_crossentropy), and metric (accuracy) we want to score.

In [30]:
# Define an optimizer, cost lost function, and scoring metric
model_1.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [30]:
model_1.fit(train_images_mlp, train_labels, epochs=5, batch_size=50)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1b906cfe2e0>

In [None]:
val_loss, val_acc = model_1.evaluate(val_images_mlp, val_labels)

At 512 Neurons:


In [31]:
model_2 = models.Sequential()
model_2.add(layers.Conv2D(filters=32, 
                        kernel_size=(2,2),
                        strides=(1,1),
                        activation='relu',
                        padding = 'same',
                        input_shape=(image_x_size, image_y_size, 3),
                        data_format = 'channels_last'))
model_2.add(layers.MaxPooling2D(pool_size=(2,2),
                     strides=2))
model_2.add(layers.Flatten())        
model_2.add(layers.Dense(512))
model_2.add(layers.Activation('relu'))
model_2.add(layers.Dense(2))
model_2.add(layers.Activation('sigmoid'))
model_2.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 64, 64, 32)        416       
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 32, 32, 32)       0         
 2D)                                                             
                                                                 
 flatten_2 (Flatten)         (None, 32768)             0         
                                                                 
 dense_4 (Dense)             (None, 512)               16777728  
                                                                 
 activation_4 (Activation)   (None, 512)               0         
                                                                 
 dense_5 (Dense)             (None, 2)                 1026      
                                                      

In [32]:
model_2.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
model_2.fit(train_images_mlp, train_labels, epochs=5, batch_size=50)

Epoch 1/5


ValueError: Unexpected result of `train_function` (Empty logs). Please use `Model.compile(..., run_eagerly=True)`, or `tf.config.run_functions_eagerly(True)` for more information of where went wrong, or file a issue/bug to `tf.keras`.

## Build a logistic regression-based neural network

   - Initialize the parameters of the model
   - Perform forward propagation, and calculate the current loss
   - Perform backward propagation (which is basically calculating the current gradient)
   - Update the parameters (gradient descent)

The cost function is then given by:
$$J(w,b) = \dfrac{1}{l}\displaystyle\sum^l_{i=1}\mathcal{L}(\hat y^{(i)}, y^{(i)})$$

$w$ and $b$ are the unknown parameters to start with: 
- remember that $b$ is a scalar
- $w$ however, is a vector of shape $n$ x $1$, with $n$ being `horizontal_pixel x vertical_pixel x 3`

# Baseline - Logistic Regression

Using gradient descent

In [36]:
image_x_size = 64
image_y_size = 64

In [37]:
# Initialize empty array with size n
def init_w(n):
    w = np.zeros((n, 1))
    return w

In [38]:
b = 0

In [39]:
w = init_w(image_x_size*image_y_size*3)

#### Forward Propagation

In [40]:
def propagation(w, b, x, y):
    l = x.shape[1]
    y_hat = 1/(1 + np.exp(- (np.dot(w.T, x) + b)))                                  
    cost = -(1/l) * np.sum(y * np.log(y_hat) + (1-y)* np.log(1 - y_hat))    
    dw = (1/l) * np.dot(x,(y_hat - y).T)
    db = (1/l) * np.sum(y_hat - y)
    return dw, db, cost

In [41]:
dw, db, cost = propagation(w, b, train_img_final, train_labels_final)

ValueError: shapes (1,12288) and (0,12568) not aligned: 12288 (dim 1) != 0 (dim 0)

In [42]:
print(dw)

print(db)

print(cost)

NameError: name 'dw' is not defined

#### Optimization

$$w := w - \alpha * dw$$
$$b := b - \alpha * db$$

In [43]:
def optimization(w, b, x, y, num_iterations, learning_rate, print_cost = False):
    
    costs = []
    
    for i in range(num_iterations):
        dw, db, cost = propagation(w, b, x, y)    
        w = w - learning_rate*dw
        b = b - learning_rate*db
        
        # Record the costs and print them every 50 iterations
        if i % 50 == 0:
            costs.append(cost)
        if print_cost and i % 50 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
    
    return w, b, costs

#### Predictions

In [None]:
def prediction(w, b, x):
    l = x.shape[1]
    y_prediction = np.zeros((1, l))
    w = w.reshape(x.shape[0], 1)
    y_hat = 1/(1 + np.exp(- (np.dot(w.T, x) + b))) 
    p = y_hat
    
    # Define our cutoff for 'True' for pneumonia, set at 50%.
    for i in range(y_hat.shape[1]):
        if (y_hat[0,i] > 0.5): 
            y_prediction[0, i] = 1
        else:
            y_prediction[0, i] = 0
    return y_prediction

## Overall Model - Baseline

In [None]:
def model(x_train, y_train, x_test, y_test, num_iterations = 2000, learning_rate = 0.5, print_cost = False):

    b = 0
    w = init_w(np.shape(x_train)[0]) 

    # Gradient descent (≈ 1 line of code)
    w, b, costs = optimization(w, b, x_train, y_train, num_iterations, learning_rate, print_cost)
    
    y_pred_test = prediction(w, b, x_test)
    y_pred_train = prediction(w, b, x_train)

    # Print train/test errors
    print('train accuracy: {} %'.format(100 - np.mean(np.abs(y_pred_train - y_train)) * 100))
    print('test accuracy: {} %'.format(100 - np.mean(np.abs(y_pred_test - y_test)) * 100))

    output = {'costs': costs, 
              'y_pred_test': y_pred_test,  
              'y_pred_train' : y_pred_train,  
              'w' : w, 
              'b' : b, 
              'learning_rate' : learning_rate, 
              'num_iterations': num_iterations}
    
    return output

Began our model to train our model at 500 iterations with a learning rate of 0.01. 

In [None]:
# Baseline - Baseline. 500 iterations at 0.01 learning rate
output = model(train_img_final, train_labels_final, test_img_final, test_labels_final, 
               num_iterations=500, learning_rate=0.01, print_cost=False)

<br> train: 95%
<br> test: 87.5%
<br>With a difference of 7.5%, between our train and test, overfitting is a problem with our model. Lets see if we can reduce that.

300 iterations at 0.01 learning rate:


In [None]:
# Possibly overfitting, a lower number of iterations - at 300 - underfits our model by a tad, but not much
output = model(train_img_final, train_labels_final, val_img_final, val_labels_final, 
               num_iterations=300, learning_rate=0.01, print_cost=True)

train: 94.2%
<br>test: 87.5%

We reduced overfitting, but only by 1%. Lets see if we can reduce overfiting further by decreasing the iterations once again, but also decreasing the learning rate.

<br>200 iterations at 0.005 learning rate:

In [None]:
# Decrease the learning rate while also lowering the number of iterations made our model fit not as much, but our metric 
# scores decreased drastically
output = model(train_img_final, train_labels_final, val_img_final, val_labels_final, 
               num_iterations=200, learning_rate=0.005, print_cost=True)

train: 92.4%
<br>test: 81.25%
<br>We successfully lowered the training accuracy, however our testing accuracy also took a substantial hit, making our model worse overall.

<br>In this final iteration, we kept the low number of iterations at 200, but increased the learning rate back up to 0.01.

In [None]:
output = model(train_img_final, train_labels_final, val_img_final, val_labels_final, 
               num_iterations=200, learning_rate=0.01, print_cost=True)

<br>test: 93%
<br>train: 87.5%

This seems to be the best overall hyper-parameters to lower overfitting while retaining most most of the predictive power. 
We scored our final model on the test set to see our final model's accuracy.

In [None]:
output = model(train_img_final, train_labels_final, test_img_final, test_labels_final, 
               num_iterations=200, learning_rate=0.01, print_cost=True)

# Evaluation 

# Data Limitations



# Conclusion 