In [14]:
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.models import Model  
import numpy as np
import pandas as pd

In [2]:
# last layer
model = VGG16(weights="imagenet", include_top=False)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [6]:
def conver_image_to_features(model, img_path, img_resize = (224, 224)):
    img = image.load_img(img_path, target_size=img_resize)
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    features = model.predict(x)
    return features.reshape(-1) # convert to 1 dimention

In [12]:
# Test Fucntion
res = conver_image_to_features(model, "../py/img_train/pos/2_A.jpg")
print(res)

[0. 0. 0. ... 0. 0. 0.]


In [80]:
def create_train_data_from_images(model, train_csv):
    df = pd.read_csv(train_csv)
    
    image_features = []
    
    for ind, row in df.iterrows():
        print("processing {} of {}".format(ind+1, df.shape[0]))
        image_features.append(
            (conver_image_to_features(model, row["img1"]),
             conver_image_to_features(model, row["img2"]),
             row["same"]))
        
    res = pd.DataFrame({
        "x1" : [x[0] for x in image_features],
        "x2" : [x[1] for x in image_features],
        "y" : [x[2] for x in image_features],
    })
    
    return res

In [27]:
# save Training data to file 
# res_df = create_train_data_from_images(model, "./min_index.csv")
# res_df.to_csv("min_feature.csv", index=False)

In [67]:
# convert image to feature 
#train_df = create_train_data_from_images(model, "./test_index.csv")
train_df = create_train_data_from_images(model, "./train_index.csv")
train_X = pd.DataFrame.from_records(train_df.x1 - train_df.x2)
train_y = train_df["y"]

In [75]:
# convert image to feature 
test_df = create_train_data_from_images(model, "./test_index.csv")
test_X = pd.DataFrame.from_records(test_df.x1 - test_df.x2)
test_y = test_df["y"]

In [76]:
# Training Model
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(max_depth=2, random_state=0)
clf.fit(train_X, train_y)

RandomForestClassifier(max_depth=2, random_state=0)

In [79]:
# Run on test
from sklearn import metrics

y_pred_train=clf.predict(train_X)
print("Training Accuracy:",metrics.accuracy_score(train_y, y_pred_train))

y_pred_test=clf.predict(test_X)
# Model Accuracy, how often is the classifier correct?
print("Test Accuracy:",metrics.accuracy_score(test_y, y_pred_test))

Training Accuracy: 0.97
Test Accuracy: 0.45
