In [1]:
import numpy as np
import os
import matplotlib.pyplot as plt
import h5py
from keras.applications import vgg16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
from keras.models import Model
% matplotlib notebook
from ipywidgets import FloatProgress
from IPython.display import display
from skimage.transform import resize
import time
import pandas as pd

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


Creating files for both training and testing

In [2]:
def save_file(X,y,name='training'):
    h5f = h5py.File('data/'+name+'_svm_f.h5', 'w')
    h5f.create_dataset('data', data=X)
    h5f.create_dataset('labels', data=y)
    h5f.close()
    
def arrays_from_files(type_ = 'training'):
    np.random.seed(42) 
    if type_ == 'training': dir_, size = 'data/sc5', 3810 
    elif type_ == 'test': dir_, size = 'data/sc5-test', 1252
        
    X, y = np.zeros((size, 240, 800, 3)), []#np.empty(shape=[size], dtype='<U1')
    count=0
    for root, dirs, files in os.walk(dir_):
        for file in files:
            if file.endswith("jpg"):
                X[count] = plt.imread(os.path.join(root, file))
                #print(file.split(".")[0])
                #y[count] = file.split(".")[0]
                y.append(file.split(".")[0])
                count+=1
    #save_file(X,y,name=type_)
    return X, y

def extract_features(model,img,input_size=224,exit_layer = 'fc2'):
    x = resize(img,(input_size,input_size))
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    features = model.predict(x)
    model_extractfeatures = Model(input=model.input, output=model.get_layer(exit_layer).output)
    fc2_features = np.squeeze(model_extractfeatures.predict(x))
    return fc2_features

def extract(X, model):
    extracted_features = np.zeros((X.shape[0],4096))
    start = time.time()
    for i in range(len(X)):
        extracted_features[i] = extract_features(model,img=X[i])
        if i%100 == 0:
            print("{} %".format(i/100))
            t = time.time() - start
            print("time eclipsed:\nSeconds: {}\nMinutes: {}\nHours: {}".format(t,t/60,t/3600))
    print("TOTAL TIME: {}".format(time.time()-start))
    return extracted_features

def extract_efficient(X, model, type_='train'):
    if not os.path.exists("data/"+type_+"ing_svm"):
        os.mkdir("data/"+type_+"ing_svm")
    predictions = []
    start = time.time()
    for i in range(len(X)):
        predictions.append(extract_features(model,img=X[i]))
        if i%10==0:
            print("{:.1f} percent done".format((i/len(X))*100))
            t = time.time() - start
            print("time eclipsed:\nSeconds: {}\nMinutes: {}\nHours: {}".format(t,t/60,t/3600))
            np.savez("data/"+type_+"ing_svm/"+type_+str(i)+'.npz', *predictions)
            predictions=[]

# VGG16 Model load

In [3]:
model = vgg16.VGG16(weights='imagenet', include_top=True)

## Training

In [None]:
X, y = arrays_from_files(type_ = 'training')

In [None]:
extract_efficient(X, model, type_='train')

In [None]:
X_train = np.zeros((X.shape[0],4096))
count = 0
for root, dirs, files in os.walk('data/training_svm/'):
    for file in files:
        npzfile = np.load(os.path.join(root, file))
        for i in npzfile.files:
            X_train[count] = npzfile[i]
            count+=1

In [None]:
np.save("data/SVM-X_train", X_train)

In [None]:
pd.DataFrame(y).to_csv("data/SVM-y_train.csv")

## Testing

In [4]:
X, y = arrays_from_files(type_ = 'test')

In [None]:
extract_efficient(X, model, type_='test')

In [5]:
X_test = np.zeros((1252,4096))
count = 0
for root, dirs, files in os.walk('data/testing_svm/'):
    for file in files:
        npzfile = np.load(os.path.join(root, file))
        for i in npzfile.files:
            X_test[count] = npzfile[i]
            count+=1

In [7]:
np.save("data/SVM-X_test", X_test)
#pd.DataFrame(y).to_csv("data/SVM-y_test.csv")

In [6]:
X_test

array([[0.        , 0.50940722, 0.        , ..., 3.21446848, 2.65405488,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.89573222, 2.59307289,
        0.        ],
       [0.89671576, 0.49614137, 0.        , ..., 1.48719215, 5.90065384,
        0.        ],
       ...,
       [0.        , 0.        , 0.59858841, ..., 5.04070425, 2.4119308 ,
        0.        ],
       [1.96803379, 0.        , 0.        , ..., 3.44944263, 2.0387485 ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])