In [38]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from mlxtend.plotting import plot_confusion_matrix
import random
import os
from PIL import Image

import tensorflow as tf

from keras.utils.np_utils import to_categorical 
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.optimizers import RMSprop,Adam
from keras.preprocessing.image import ImageDataGenerator, load_img
from keras.callbacks import ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_curve, roc_curve, auc
from sklearn.metrics import f1_score

# LABELING

In [39]:
folder = "../input/writing/train"

train_label = []                            # Label to train dataset

for img in os.listdir(folder):
    if 'handwrit' in img:            
        train_label.append((img, 1))
    elif "printout" in img:
        train_label.append((img, 0))
        
        
dev_label = []                                      # Label to dev (development) dataset
folder1 = "../input/writing/valid"

for img in os.listdir(folder1):
    if 'handwrit' in img:            
        dev_label.append((img, 1))
    elif "printout" in img:
        dev_label.append((img, 0))

dev_label= pd.DataFrame(dev_label)
dev_label= dev_label.rename(columns={0:"filename", 1: "categories"})

train_label= pd.DataFrame(train_label)
train_label= train_label.rename(columns={0:"filename", 1: "categories"})     # Labeled our data.


In [40]:
train_label.head()

In [41]:
dev_label.head()

In [42]:
train_label["categories"] = train_label["categories"].replace({0: 'printout', 1: 'handwrit'}) 
dev_label["categories"] = dev_label["categories"].replace({0: 'printout', 1: 'handwrit'}) 

In [43]:
dev = shuffle(dev_label, random_state=0)
train = shuffle(train_label, random_state=0)

train = train.reset_index(drop=True)
dev = dev.reset_index(drop=True)

In [44]:
train['categories'].value_counts().plot.bar()

In [45]:
dev['categories'].value_counts().plot.bar()

# PREPARE TRAIN AND DEV SET

In [46]:
#Prepare train dataset

train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.1,
    featurewise_center=False,  # set input mean to 0 over the dataset
    samplewise_center=False,  # set each sample mean to 0
    featurewise_std_normalization=False,  # divide inputs by std of the dataset
    samplewise_std_normalization=False,  # divide each input by its std
    zca_whitening=False,  # dimesion reduction
    rotation_range=5,  # randomly rotate images in the range 5 degrees
    zoom_range = 0.1, # Randomly zoom image 10%
    width_shift_range=0.1,  # randomly shift images horizontally 10%
    height_shift_range=0.1,  # randomly shift images vertically 10%
    horizontal_flip=False,  # randomly flip images
    vertical_flip=False
)

train_generator = train_datagen.flow_from_dataframe(
    train, 
    "../input/writing/train", 
    x_col='filename',
    y_col='categories',
    target_size= (64,256),
    class_mode='categorical',
    batch_size=25,
    shuffle=False
)

In [47]:

#Prepare dev dataset

validation_datagen = ImageDataGenerator(rescale=1./255)


validation_generator = validation_datagen.flow_from_dataframe(
    dev, 
    "../input/writing/valid", 
    x_col='filename',
    y_col='categories',
    target_size=(64,256),
    class_mode='categorical',
    batch_size=25,
    shuffle=False
)

In [48]:
#An example from train data

example_df = train_label.sample(n=1).reset_index(drop=True)

example_generator = train_datagen.flow_from_dataframe(
    example_df, 
    "../input/writing/train", 
    x_col='filename',
    y_col='categories',
    target_size=(64,256),
    class_mode='categorical'
)

plt.figure(figsize=(12, 12))
for i in range(0, 15):
    plt.subplot(5, 3, i+1)
    for X_batch, Y_batch in example_generator:
        image = X_batch[0]
        plt.imshow(image)
        break
plt.tight_layout()
plt.show()

# CREATE MODEL

In [61]:
#Create Model

model = Sequential()

#conv1

model.add(Conv2D(filters = 8, kernel_size = (5,5), padding = "Same", strides = (2,2), activation = "relu", input_shape = (64,256,3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size = (2,2)))
model.add(Dropout(0.5))

#conv2
model.add(Conv2D(filters = 16, kernel_size = (3,3), padding = "Same", strides = (1,1), activation = "relu", input_shape = (64,256,3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size = (2,2)))
model.add(Dropout(0.5))

#conv3
model.add(Conv2D(filters = 32, kernel_size = (3,3), padding = "Same", strides = (1,1), activation = "relu", input_shape = (64,256,3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size = (2,2)))
model.add(Dropout(0.5))

#conv4
model.add(Conv2D(filters = 64, kernel_size = (1,1), padding = "Same", strides = (1,1), activation = "relu", input_shape = (64,256,3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size = (2,2)))
model.add(Dropout(0.5))

#FC1(Fully Connected) Layer
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

#FC2(Fully Connected) Layer
model.add(Dense(64, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))


optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.1)

model.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics= tf.keras.metrics.CategoricalAccuracy())

model.summary()

# MODEL ARCHITECTURE

## Why RELU in Hidden Layer ?
- The main advantage of the ReLU function, which is often used in Convolutional Neural Network (CNN) and middleware, is that it does not activate all neurons at the same time. 
- So if a neuron produces a negative value, it means that it will not be activated. 
- This allows ReLU to work more efficiently and faster than the Hyperbolic Tangent and Sigmoid function.
- Therefore, ReLU is more preferred in multilayer neural networks.

## Why filters started at 8 and increasing per each CONV layer ?
- This is the first step process of extracting valuable feautres from an image
- CNN compute input images with using filters and review the images with scans the image. 
- The layers which close to input layer will less, which closeto ouput layer will learn more filters.
- İt must be multiples of 2

## Why ı use padding ?
- Use of Padding is increasing the model performance
- If you dont use, the volume will decrease after each layer and the ınfromatıons may delete quickly.

## Kernel Size
- It determıne heigh and width of Convolution window
- If ınput image larger than 128x128, start with 5x5 or 7x7 and decrease quickly and start work with 3x3. If not, stay with 3xor 1x1.
- In this picture, I also used 1x1 as my picture is equal to 128x128

## Strides
- It determines how many pixels the filter will slide over the main page.

## BatchNormalization
- Batch normalization is a larger that allows every layer of the network to do learning more independently
- Using Batch Normalization learning become efficient also it can be used as regulaziation to avoid overfitting of the model
- It is often placed just after defining the Sequential model and after the convolution layer.

## Pooling Layer
- It work on feature map separately to create new set.
- It used as 2x2 on 2x2 feature maps
- I used Max Pooling becasue ıt generally used on visual data

## Dropout Layer
- Since all parameters are occupied into FCL (Fully Connected Layer), it causes overfitting.
- Dropout is one of the techniquies that reduces overfitting.
- It should be 0.5-0.8 in hidden layer
- It should be around of 0.8 in input layer

## Flatten Layer
- It flatten the pooled of features into an input vector to processing in neural networks

## FCL (Fully Connected Layer)
- FCL looks like a regular neural network connecting all neurons and forms the last few layers in the network. 
- The output from flatten layer is fed to this FCL layer.

## Why RMSprop Optimazer ?
- I used the most used Adam optimizer, but the dev set loss was very high and I can't say that it improved the model performance very much, but RMSprop significantly increased the model performance. 
- When choosing an optimizer, I prefer different models starting from the most used ones, so I discover the most suitable optimizer algorithm for the model.

## Why Categorical Crossentropy Loss ? 
- It computes the cross-entropy loss between true labels and predicted labels. Thıs is most ımportant for us because predicted true label is more important for the model.
- We have two categories so should use this loss function

## Why Categorical Accuracy Metrics ?

- This metric creates two local variables, total and count that are used to compute the frequency with which y_pred matches y_true. 
* This frequency is ultimately returned as categorical accuracy: an idempotent operation that simply divides total by count.
- A metric is a function that is used to judge the performance of the model so this metric is more useable for us


In [50]:
epochs = 50  # for better result increase the epochs
batch_size = 64 #for better results you may  decrease the batch_size but training time will increase

In [62]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau


earlystop = EarlyStopping(patience=10)

learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=2, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)


callbacks = [earlystop, learning_rate_reduction]



# ReduceLRonPlateau
- Reduce learning rate when a metric has stopped improving.
- Models often benefit from reducing the learning rate by a factor of 2-10 once learning stagnates. 
- This callback monitors a quantity and if no improvement is seen for a 'patience' number of epochs, the learning rate is reduced.

# EarlyStopping
- Stop training when a monitored metric has stopped improving.
- Assuming the goal of a training is to minimize the loss.
- With this, the metric to be monitored would be 'loss', and mode would be 'min'. 
- A model.fit() training loop will check at end of every epoch whether the loss is no longer decreasing, considering the min_delta and patience if applicable.
- Once it's found no longer decreasing, model.stop_training is marked True and the training terminates.



In [63]:
history = model.fit_generator(train_generator,
                              epochs = epochs,
                              validation_data = validation_generator,
                              validation_steps=dev_label.shape[0]//batch_size,
                              steps_per_epoch=train_label.shape[0] // batch_size,
                             callbacks=callbacks)

In [68]:
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))

ax1.plot(history.history['loss'], color='b', label="Training loss")
ax1.plot(history.history['val_loss'], color='r', label="validation loss")
ax1.set_xticks(np.arange(1, 23, 1))
ax1.set_yticks(np.arange(0, 1, 0.1))

ax2.plot(history.history['categorical_accuracy'], color='b', label="Training accuracy")
ax2.plot(history.history['val_categorical_accuracy'], color='r',label="Validation accuracy")
ax2.set_xticks(np.arange(1, 23, 1))

legend = plt.legend(loc='best', shadow=True)
plt.tight_layout()
plt.show()

In [91]:
model.save_weights("model.h5")

# Prepare Testing Data

In [70]:
test_filenames = os.listdir("../input/writing/test")
test = pd.DataFrame({
    'filename': test_filenames
})
nb_samples = test.shape[0]

In [71]:
test

# Create Testing Generator

In [72]:
test_gen = ImageDataGenerator(rescale=1./255)
test_generator = test_gen.flow_from_dataframe(
    test, 
    "../input/writing/test", 
    x_col='filename',
    y_col=None,
    class_mode=None,
    target_size= (64,256),
    batch_size=32,
    shuffle=False
)

# Predict

In [73]:
predict = model.predict_generator(test_generator)

test['predicted'] = np.argmax(predict, axis=-1)

# For categorical classification the prediction will come with probability of each category. 
# So we will pick the category that have the highest probability with numpy average max

label_map = dict((v,k) for k,v in train_generator.class_indices.items())

# We will convert the predict category back into our generator classes by using train_generator.class_indices. 
# It is the classes that image generator map while converting data into computer vision

test['predicted'] = test['predicted'].replace(label_map)

In [74]:
test

# Predicted Result with Images

In [75]:
sample_test = test.head(25)
sample_test.head()
plt.figure(figsize=(24, 25))
for index, row in sample_test.iterrows():
    filename = row['filename']
    category = row['predicted']
    img = load_img("../input/writing/test/"+filename, target_size=(64,256))
    plt.subplot(5, 5, index+1)
    plt.imshow(img)
    plt.xlabel(filename + '(' + "Predicted:{}".format(category) + ')' )
plt.tight_layout()
plt.show()

In [76]:
folder = "../input/writing/test"

test_label = []                        

for img in os.listdir(folder):
    if 'handwrit' in img:            
        test_label.append((img, 1))
    elif "printout" in img:
        test_label.append((img, 0))

test_conf= pd.DataFrame(test_label)
test_conf= test_conf.rename(columns={0:"filenames", 1: "Real_Label"}) 
test_conf= test_conf.drop(labels=["filenames"], axis = 1)
test_conf = test_conf.reset_index(drop=True)
test_conf["Real_Label"] = test_conf["Real_Label"].replace({0: 'printout', 1: 'handwrit'}) 
test_conf['Predicted_Label'] = test['predicted'].replace(label_map)

test_conf['Real_Label'] = test_conf['Real_Label'].replace({ 'handwrit': 1, 'printout': 0 })
test_conf['Predicted_Label'] = test_conf['Predicted_Label'].replace({ 'handwrit': 1, 'printout': 0 })
test_conf

In [104]:
cm_df

In [100]:
matrix

In [107]:
matrix = confusion_matrix(Real_Label,Predicted_Label)

cm_df = pd.DataFrame(matrix,
                     index = ['PRINTOUT','HANDWRIT'], 
                     columns = ['PRINTOUT','HANDWRIT'])

plt.figure(figsize=(10,10))
sns.heatmap(cm_df, annot=True)
plt.title('Confusion Matrix')
plt.ylabel('TRUE LABEL')
plt.xlabel('PREDICTED LABEL')
plt.show()


In [78]:
print(classification_report(Real_Label, Predicted_Label))

In [79]:
from sklearn.metrics import roc_auc_score
score = roc_auc_score(Real_Label, Predicted_Label)
print(f"ROC AUC: {score:.4f}")

In [83]:
fpr, tpr, thresholds = roc_curve(Real_Label, Predicted_Label)
roc_auc = auc(fpr, tpr)
precision, recall, thresholds= precision_recall_curve(Real_Label, Predicted_Label)
auc_score = auc(recall, precision)
f1 = f1_score(Real_Label, Predicted_Label)

# PRECISION / RECALL

In [84]:
no_skill = len(Real_Label[Real_Label==1]) / len(Real_Label)

plt.figure(figsize=(10, 10))

print('MODEL: f1=%.3f auc=%.3f' % (f1, auc_score))

plt.plot(recall, precision, marker='.', label='PRECISION/RECALL f1=%.3f AUC=%.3f' % (f1, auc_score))
plt.plot([0, 1], [no_skill, no_skill], linestyle='--', label='No Skill')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.legend()
plt.show()

In [85]:
ns_probs = [0 for _ in range(len(Predicted_Label))]
ns_auc = roc_auc_score(Predicted_Label, ns_probs)
ns_fpr, ns_tpr, _ = roc_curve(Predicted_Label, ns_probs)

plt.figure(figsize=(10, 10))

plt.plot(ns_fpr, ns_tpr, linestyle='--', label="No Skill: ROC AUC=%.3f" % (ns_auc))
plt.plot(fpr, tpr, marker='.', label='ROC AUC=%.3f' % (auc_score))
# axis labels
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
# show the legend
plt.legend()
# show the plot
plt.show()

# CONCLUSION

### Firstly, ı see the dev set accuracy and loss better than training set. So Why ?

- We may have small validation set (2500)
- We may have highly unbalanced data in val set but ı do not think so.
- We use regularization methods such as  Dropout, while model calculate training accuracy it process through regularized model but when it test accuracy on val set, it process your data trough un regularized model. Regularization introduces some noise in loss value during training, because of this training accuracy decreases than expected, but while evaluating the model, model doesn’t use regularization hence no noise, val accuracy doesn’t decrease.

### PRECISION / RECALL , F1 Score

- It describes how good a model is at predicting the positive class. Precision is referred to as the positive predictive value.  This is important for us because the important thing in the model is the true predictive and our precision recall score is good.
- But dont forget, reviewing bot precision and recall is useful in cases where there is an inbalance in the observations between the two classes. Specifically, there are many examples of no event (class 0 ) and only a few examples. So thıs graph may not useful for us.
- F1 Score calculates mean of the precision and recall.

### AUC/ROC CURVE

- Roc curve is an evaluation metric for classification problems. It is a probablity curve that plot the TPR (True Positive Rate) against FPR (False Positive Rate) at various threshold values and essentially separates the signal from the noise.
- The AUC is the meause of the ablity of classifier distinguish between classes and is used as a summary of the ROC curve.
- ROC Curves should be used when there are queal numbers of observations for each class. So, this graph more usuable for us than Precısıon/Recall curve.
- ROC/AUC scores seems good so we may say the model can predictive good

### Confusıon Matrix

- When we review the confusıon matrix, we can see the good result there is only FP (False Positive) values have problems. So, our model saw printout data as  handwrit data.
- Actually this is normal. Because some data are like printout data or noisy data reason for this results.

- We can say the development data of the model should be increased and diversity should be introduced. Also, different methods can be tried by changing the parameters of the model.

