# Transfer Learning with Inception v3

In [1]:
# Importing the Keras libraries and packages
# import os
# import cv2 
import numpy as np
import pandas as pd 
import tensorflow as tf
from matplotlib import pyplot as plt
%matplotlib inline

from keras.engine import  Model
from keras.layers import Flatten, Dense, Input
# from keras_vggface.vggface import VGGFace


Using TensorFlow backend.


In [2]:
#Setting Cuda Visible Devices to 1 for the tf-gpu version to work properly
CUDA_VISIBLE_DEVICES = 1

## Load Data:

In [3]:
#now we're loading in the images with the Keras ImageDataGenerator module
from keras.preprocessing.image import ImageDataGenerator

# Anwalt_Call, Arzt_Anwalt, Kfz_Arzt, Arzt_Call, Kfz_Anwalt, Kfz_Call
# BauBE_KunstBE, BauCeo_BauBE, BauCeo_KunstBE, BauCeo_KunstCeo, KunstCeo_BauBE, KunstCeo_KunstBE
# FinBE_KomCeo, IndBE_KunstCeo 

#Setting paths and parameters: 
train_dir = r'C:\Users\melte\Desktop\ML\Binary\Block3\IndBE_KunstCeo\Train'
test_dir = r'C:\Users\melte\Desktop\ML\Binary\Block3\IndBE_KunstCeo\Test'
val_dir = r'C:\Users\melte\Desktop\ML\Binary\Block3\IndBE_KunstCeo\Val'

# nTrain = 76
# nTest = 24
# nVal = 20

nTrain = 768
nTest = 240 
nVal = 192 

c = 2   #classes
batch_size = 20

#######################
### DATA GENERATOR ####
#######################

datagen = ImageDataGenerator(
    rescale=1./255, 
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

#############################################
### LOADING IN TRAIN, TEST AND VALIDATION SETS ####
#############################################

train_generator = datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical')

test_generator = datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical')

val_generator = datagen.flow_from_directory(
    val_dir,
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical')

Found 768 images belonging to 2 classes.
Found 240 images belonging to 2 classes.
Found 192 images belonging to 2 classes.


## Import Network

In [4]:
from keras.applications import InceptionV3
import keras.layers as layers 
from keras import models

#This is the pretrained network we're using for transfer learning: 
model_vgg16 = InceptionV3(weights='imagenet',
                  include_top=False,
                  input_shape=(224, 224, 3))

# # Create the model
# model_vgg16 = models.Sequential()
 
# # Add the vgg convolutional base model
# model_vgg16.add(base_model)
 
# # Add new layers
# model_vgg16.add(layers.MaxPooling2D(pool_size=(2, 2), strides=None, padding='valid', data_format=None))
# # model.add(layers.Dense(1024, activation='relu'))
# # model.add(layers.Dropout(0.5))
# # model.add(layers.Dense(c, activation='softmax'))
 
# # Show a summary of the model. Check the number of trainable parameters
# # model.summary()

In [5]:
# # #This is what the model looks like
# base_model.summary()

In [6]:
# model_vgg16 = Model(inputs=base_model.input, outputs=base_model.get_layer('activation_94').output)

## Creating the feature vectors for our train and validation sets:

In [7]:
#Let's initiate our feature and label arrays: 
train_features = np.zeros(shape=(nTrain, 5, 5, 2048))
train_labels = np.zeros(shape=(nTrain,c))

test_features = np.zeros(shape=(nTest, 5, 5, 2048))
test_labels = np.zeros(shape=(nTest,c))

val_features = np.zeros(shape=(nVal, 5, 5, 2048))
val_labels = np.zeros(shape=(nVal,c))

In [8]:
i = 0
for inputs_batch, labels_batch in train_generator:
    features_batch = model_vgg16.predict(inputs_batch)
    train_features[i * batch_size : (i + 1) * batch_size] = features_batch
    train_labels[i * batch_size : (i + 1) * batch_size] = labels_batch
    i += 1
    if i * batch_size >= nTrain:
        break
         
train_features = np.reshape(train_features, (nTrain, 5* 5*  2048))

i = 0
for inputs_batch, labels_batch in test_generator:
    features_batch = model_vgg16.predict(inputs_batch)
    test_features[i * batch_size : (i + 1) * batch_size] = features_batch
    test_labels[i * batch_size : (i + 1) * batch_size] = labels_batch
    i += 1
    if i * batch_size >= nTest:
        break       
test_features = np.reshape(test_features, (nTest, 5* 5*  2048))


i = 0
for inputs_batch, labels_batch in val_generator:
    features_batch_val = model_vgg16.predict(inputs_batch)
    val_features[i * batch_size : (i + 1) * batch_size] = features_batch_val
    val_labels[i * batch_size : (i + 1) * batch_size] = labels_batch
    i += 1
    if i * batch_size >= nVal:
        break
         
val_features = np.reshape(val_features, (nVal, 5* 5* 2048))

## SVM 

In [9]:
from sklearn.metrics import accuracy_score, classification_report 
from sklearn.svm import SVC 

SVM = SVC(kernel = 'rbf', gamma='scale', C=20)
SVM.fit(train_features, train_labels[:,0])


train_SVM = SVM.predict(train_features)
val_SVM = SVM.predict(val_features)
test_SVM = SVM.predict(test_features)


print("\nrSVM - Train Confusion Matrix\n\n",
      pd.crosstab(train_labels[:,0], train_SVM, rownames = ["Actual"], colnames = ["Predicted"]))
print("\nrSVM - Train Accuracy:", round(accuracy_score(train_labels[:,0], train_SVM),3))
print("\nrSVM - Train Classification Report\n",classification_report(train_labels[:,0], train_SVM)) 


print("\n\nrSVM - Val Confusion Matrix\n\n",
      pd.crosstab(val_labels[:,0], val_SVM, rownames = ["Actual"], colnames = ["Predicted"]))
print("\nrSVM - Val Accuracy:", round(accuracy_score(val_labels[:,0], val_SVM),3))
print("\nrSVM - Test Classification Report\n",classification_report(val_labels[:,0], val_SVM))

print("\n\nrSVM - Test Confusion Matrix\n\n",
      pd.crosstab(test_labels[:,0], test_SVM, rownames = ["Actual"], colnames = ["Predicted"]))
print("\nrSVM - Test Accuracy:", round(accuracy_score(test_labels[:,0], test_SVM),3))
print("\nrSVM - Test Classification Report\n",classification_report(test_labels[:,0], test_SVM))


rSVM - Train Confusion Matrix

 Predicted  0.0  1.0
Actual             
0.0        384    0
1.0          0  384

rSVM - Train Accuracy: 1.0

rSVM - Train Classification Report
               precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       384
         1.0       1.00      1.00      1.00       384

   micro avg       1.00      1.00      1.00       768
   macro avg       1.00      1.00      1.00       768
weighted avg       1.00      1.00      1.00       768



rSVM - Val Confusion Matrix

 Predicted  0.0  1.0
Actual             
0.0         52   44
1.0         39   57

rSVM - Val Accuracy: 0.568

rSVM - Test Classification Report
               precision    recall  f1-score   support

         0.0       0.57      0.54      0.56        96
         1.0       0.56      0.59      0.58        96

   micro avg       0.57      0.57      0.57       192
   macro avg       0.57      0.57      0.57       192
weighted avg       0.57      0.57      0.57    

In [10]:
# from sklearn.neighbors import KNeighborsClassifier 
# from sklearn.metrics import accuracy_score, classification_report


# array = np.empty((5,4))
# k_valchart = pd.DataFrame(array)
# k_valchart.columns = ["c_value", "Train_acc", "Val_acc", "Test_acc"]

# vals = [1,20,40,60,80,100]

# for i in range (len(vals)):
#     SVM = SVC(kernel = 'rbf', gamma = 'scale', C=(vals[i]))
#     SVM.fit(train_features, train_labels[:,0])
#     train_SVM = SVM.predict(train_features)
#     test_SVM = SVM.predict(test_features)
#     val_SVM = SVM.predict(val_features)
    
#     tr_accscore = round(accuracy_score(train_labels[:,0], train_SVM), 3)
#     test_accscore = round(accuracy_score(test_labels[:,0], test_SVM), 3)
#     val_accscore = round(accuracy_score(val_labels[:,0], val_SVM), 3)
    
#     k_valchart.loc[i, 'c_value'] = vals[i]
#     k_valchart.loc[i, 'Train_acc'] = tr_accscore
#     k_valchart.loc[i, 'Test_acc'] = test_accscore
#     k_valchart.loc[i, 'Val_acc'] = val_accscore
    
# #PLOTTING ACCURACIES OVER DIFFERENT K VALUES 
# plt.figure() 
# plt.xlabel('c_value')
# plt.ylabel('Accuracy')
# plt.plot(k_valchart["c_value"], k_valchart["Train_acc"])
# plt.plot(k_valchart["c_value"], k_valchart["Test_acc"])
# plt.plot(k_valchart["c_value"], k_valchart["Val_acc"])

# plt.axis([0.9, 5, 0.2, 1.005])
# plt.xticks([1,20,40,60,80,100])

# for a,b in zip(k_valchart["c_value"], k_valchart["Train_acc"]):
#     plt.text(a, b, str(b), fontsize = 10)

# for a,b in zip(k_valchart["c_value"], k_valchart["Test_acc"]):
#     plt.text(a, b, str(b), fontsize = 10)
    
# for a,b in zip(k_valchart["c_value"], k_valchart["Val_acc"]):
#     plt.text(a, b, str(b), fontsize = 10)
    
# plt.legend(loc = 'upper right')
# plt.show()

## KNN Classifier

In [11]:
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.metrics import accuracy_score, classification_report


knn_fit = KNeighborsClassifier(n_neighbors = 10, p = 2, metric = 'minkowski')

knn_fit.fit(train_features, train_labels[:,0])

train_res = knn_fit.predict(train_features)
val_res = knn_fit.predict(val_features)
test_res = knn_fit.predict(test_features)
# print(train_res)

print("\nrSVM - Train Confusion Matrix\n\n",
      pd.crosstab(train_labels[:,0], train_SVM, rownames = ["Actual"], colnames = ["Predicted"]))
print("\nrSVM - Train Accuracy:", round(accuracy_score(train_labels[:,0], train_SVM),3))
print("\nrSVM - Train Classification Report\n",classification_report(train_labels[:,0], train_SVM)) 


print("\n\nrSVM - Val Confusion Matrix\n\n",
      pd.crosstab(val_labels[:,0], val_SVM, rownames = ["Actual"], colnames = ["Predicted"]))
print("\nrSVM - Val Accuracy:", round(accuracy_score(val_labels[:,0], val_SVM),3))
print("\nrSVM - Test Classification Report\n",classification_report(val_labels[:,0], val_SVM))

print("\n\nrSVM - Test Confusion Matrix\n\n",
      pd.crosstab(test_labels[:,0], test_SVM, rownames = ["Actual"], colnames = ["Predicted"]))
print("\nrSVM - Test Accuracy:", round(accuracy_score(test_labels[:,0], test_SVM),3))
print("\nrSVM - Test Classification Report\n",classification_report(test_labels[:,0], test_SVM))


rSVM - Train Confusion Matrix

 Predicted  0.0  1.0
Actual             
0.0        384    0
1.0          0  384

rSVM - Train Accuracy: 1.0

rSVM - Train Classification Report
               precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       384
         1.0       1.00      1.00      1.00       384

   micro avg       1.00      1.00      1.00       768
   macro avg       1.00      1.00      1.00       768
weighted avg       1.00      1.00      1.00       768



rSVM - Val Confusion Matrix

 Predicted  0.0  1.0
Actual             
0.0         52   44
1.0         39   57

rSVM - Val Accuracy: 0.568

rSVM - Test Classification Report
               precision    recall  f1-score   support

         0.0       0.57      0.54      0.56        96
         1.0       0.56      0.59      0.58        96

   micro avg       0.57      0.57      0.57       192
   macro avg       0.57      0.57      0.57       192
weighted avg       0.57      0.57      0.57    

### Plotting accuracies for varying K values:

In [12]:
# from sklearn.neighbors import KNeighborsClassifier 
# from sklearn.metrics import accuracy_score, classification_report

# array = np.empty((5,4))
# k_valchart = pd.DataFrame(array)
# k_valchart.columns = ["K_value", "Train_acc", "Test_acc", "Val_acc"]

# k_vals = [1,5,10,15,20,30]

# for i in range (len(k_vals)):
#     knn_fit = KNeighborsClassifier(n_neighbors = k_vals[i], p=2, metric='minkowski')
#     knn_fit.fit(train_features, train_labels[:,0])
#     tr_accscore = round(accuracy_score(train_labels[:,0], knn_fit.predict(train_features)), 3)
#     test_accscore = round(accuracy_score(test_labels[:,0], knn_fit.predict(test_features)), 3)
#     val_accscore = round(accuracy_score(val_labels[:,0], knn_fit.predict(val_features)), 3)
    
#     k_valchart.loc[i, 'K_value'] = k_vals[i]
#     k_valchart.loc[i, 'Train_acc'] = tr_accscore
#     k_valchart.loc[i, 'Test_acc'] = test_accscore
#     k_valchart.loc[i, 'Val_acc'] = val_accscore
    
# #PLOTTING ACCURACIES OVER DIFFERENT K VALUES 
# plt.figure() 
# plt.xlabel('K_value')
# plt.ylabel('Accuracy')
# plt.plot(k_valchart["K_value"], k_valchart["Train_acc"])
# plt.plot(k_valchart["K_value"], k_valchart["Test_acc"])
# plt.plot(k_valchart["K_value"], k_valchart["Val_acc"])

# plt.axis([0.9, 5, 0.2, 1.005])
# plt.xticks([1,5,10,15,20,30])

# for a,b in zip(k_valchart["K_value"], k_valchart["Train_acc"]):
#     plt.text(a, b, str(b), fontsize = 10)
    
# for a,b in zip(k_valchart["K_value"], k_valchart["Test_acc"]):
#     plt.text(a, b, str(b), fontsize = 10)
    
# for a,b in zip(k_valchart["K_value"], k_valchart["Val_acc"]):
#     plt.text(a, b, str(b), fontsize = 10)
    
# plt.legend(loc = 'upper right')
# plt.show()