In [1]:

import numpy as np
import pandas as pd
import cv2

In [2]:
import requests
URL = "https://visualgenome.org/api/v0/images/"


def download_visual_genome_image(image_id, folder):
    """ Downloads image from the visual genome dataset

    :param image_id: id of the image to download
    :type image_id: str
    :param folder: where to download the image
    :return:
    """
    r = requests.get(url=URL + image_id)
    data = r.json()

    print(f"Downloading image: {data['url']}")

    img_data = requests.get(data['url']).content
    with open(folder + f'/{image_id}.jpg', 'wb') as handler:
        handler.write(img_data)

In [3]:
import os
def create_descriptor_features(image_files):
    """Create features for images with SIFT descriptor

    :param image_files: list of images to be processed
    :type image_files: list(str)
    :return: numpy array of the created features
    :rtype: np.array
    """
    trainer = cv2.BOWKMeansTrainer(clusterCount=100)
    sift = cv2.xfeatures2d.SIFT_create()
    matcher = cv2.FlannBasedMatcher_create()
    bow_extractor = cv2.BOWImgDescriptorExtractor(sift, matcher)

    print('Creating dictionary')
    if os.path.exists('data/dictionary.npy'):
        dictionary = np.load('data/dictionary.npy')
    else:
        for filename in image_files:
            file = f'data/visual_genome/{filename.lower()}'
            img = cv2.imread(file)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
            key_points, desc_obj = sift.detectAndCompute(img, mask=None)
            trainer.add(desc_obj)

        dictionary = trainer.cluster()
        np.save('data/dictionary.npy', dictionary)

    bow_extractor.setVocabulary(dictionary)

    feature_data = np.zeros(shape=(len(image_files), dictionary.shape[0]),
                            dtype=np.float32)

    print('Extract features')
    for i, filename in zip(range(len(image_files)), image_files):
        file = f'data/visual_genome/{filename.lower()}'
        img = cv2.imread(file)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        points = sift.detect(img)
        feature_data[i] = bow_extractor.compute(img, points)
    return feature_data


In [4]:
import pickle
from tqdm import tqdm
from tensorflow.keras.models import Model
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
# tensorflow.contrib.keras.api.keras.applications.vgg16 from original code

def load_vgg16(fc):
    """ Creates VGG16 model.

    :param fc: fully connected layer as output layer if true
    :type fc: bool
    :return: instance of VGG16 keras model
    :rtype: keras.Model
    """
    base_model = VGG16(include_top=True, weights='imagenet', input_shape=(224, 224, 3))
    if fc:
        model = Model(inputs=base_model.input, outputs=base_model.get_layer(name='fc2').output)
    else:
        model = Model(inputs=base_model.input, outputs=base_model.get_layer(name='block5_pool').output)
    model.trainable = False
    return model


def create_features(image_id, model):
    """ Creates features with VGG16 model for given image.

    :param image_id: id of the image
    :type image_id: str
    :param model: VGG16 model
    :type model: keras.Model
    :return: features of the image
    :rtype: numpy.array
    """
    img = cv2.resize(cv2.imread(f'./data/visual_genome/{image_id}.jpg'), (224, 224))
    features = model.predict(preprocess_input(np.expand_dims(img.astype(np.float32), axis=0)))
    return features[0]


def create_features_parallel(image_ids, model):
    """ Creates features with VGG16 model for given image.

    :param image_ids: ids of the images
    :type image_ids: list
    :param model: VGG16 model
    :type model: keras.Model
    :return: features of the image
    :rtype: numpy.array
    """
    input_f = []
    for image_id in image_ids:
        print("START", image_id)  # last printed DONE 2414570
        img = cv2.imread("./data/visual_genome/" + str(image_id) + '.jpg')
        img = cv2.resize(img, (224, 224))
        input_f.append(img.astype(np.float32))

    features = model.predict(preprocess_input(np.array(input_f)))
    return features


def load_vgg16_features(image_id, fc):
    """ Loads VGG16 features for the image with given id. It assumes that the features are already created.

    :param image_id: id of the image
    :type image_id: str
    :param fc: use features from fully connected layer if true
    :type fc: bool
    :return: features of the image
    :rtype: numpy.array
    """
    if fc:
        with open(f'./Lab05_Output/dataset/features/vgg16/{image_id}.pkl', 'rb') as f:
            features = pickle.load(f)
    else:
        with open(f'./Lab05_Output/dataset/features/vgg16-conv/{image_id}.pkl', 'rb') as f:
            features = pickle.load(f)
    return features


def create_vgg16_features_parallel(image_ids, fc):
    """ Creates VGG16 features for images with given ids. Features are saved to a file

    :param image_ids: image ids
    :type image_ids: numpy.array
    :param fc: use features from fully connected layer if true
    :type fc: bool
    """
    vgg_16_model = load_vgg16(fc)
    images = []
    for i, image_id in zip(tqdm(list(range(len(image_ids)))), image_ids):
        if fc:
            features_path = f'./Lab05_Output/dataset/features/vgg16/{image_id}.pkl'
        else:
            features_path = f'./Lab05_Output/dataset/features/vgg16-conv/{image_id}.pkl'
        if not os.path.exists(features_path):
            images.append(image_id)
        if len(images) == 16 or i == len(image_ids) - 1:
            features = create_features_parallel(images, vgg_16_model)
            for im_id, feats in zip(images, features):
                if fc:
                    with open(f'./Lab05_Output/dataset/features/vgg16/{im_id}.pkl', 'wb') as f:
                        pickle.dump(feats, f)
                else:
                    with open(f'./Lab05_Output/dataset/features/vgg16-conv/{im_id}.pkl', 'wb') as f:
                        pickle.dump(feats, f)
            images = []


def create_vgg16_features(image_ids, fc):
    """ Creates VGG16 features for images with given ids. Features are saved to a file

    :param image_ids: image ids
    :type image_ids: numpy.array
    :param fc: use features from fully connected layer if true
    :type fc: bool
    """
    vgg_16_model = load_vgg16(fc)
    for _, image_id in zip(tqdm(list(range(len(image_ids)))), image_ids):
        if fc:
            if not os.path.exists(f'./Lab05_Output/dataset/features/vgg16/{image_id}.pkl'):
                features = create_features(image_id, vgg_16_model)
                with open(f'./Lab05_Output/dataset/features/vgg16/{image_id}.pkl', 'wb') as f:
                    pickle.dump(features, f)
        else:
            if not os.path.exists(f'./Lab05_Output/dataset/features/vgg16-conv/{image_id}.pkl'):
                features = create_features(image_id, vgg_16_model)
                with open(f'./Lab05_Output/dataset/features/vgg16-conv/{image_id}.pkl', 'wb') as f:
                    pickle.dump(features, f)

#
# if __name__ == '__main__':
#     vgg_16_model = load_vgg16(False)
#     create_features('KITP-11-22560-g004',
#                     vgg_16_model)
    # train_ids = load_image_ids('train')
    # create_vgg16_features_parallel(train_ids, False)
    # val_ids = load_image_ids('val')
    # create_vgg16_features_parallel(val_ids, False)
    # test_ids = load_image_ids('test')
    # create_vgg16_features_parallel(test_ids, False)

### Zad 1 - extracting data, train_test_split, simple RandomForestClassifier for openCV features extraction with BOWImgDescriptorExtractor
All images downloaded

In [5]:
def getNames(file):
    filename = os.fsdecode(file)
    img_name = filename.split(".")[0]
    return img_name
vg_objects = pd.read_csv('./data/visual_genome_objects.csv')
directory = os.fsencode("./data/visual_genome")
lst_imgs = os.listdir(directory)
img_names = list(map(getNames,lst_imgs))
# for imgID in list(set(vg_objects['Image ID'])):
#     if str(imgID) not in img_names:
#         download_visual_genome_image(str(imgID), './data/visual_genome/')

In [6]:
# print(list(map(lambda x: x + ".jpg",img_names)))
image_files = list(map(lambda x: x + ".jpg",img_names))
descriptor_features = create_descriptor_features(image_files)
descriptor_features

Creating dictionary
Extract features


array([[0.01522843, 0.01232777, 0.00942712, ..., 0.0137781 , 0.00652647,
        0.00942712],
       [0.00698324, 0.0377095 , 0.0027933 , ..., 0.00837989, 0.0027933 ,
        0.00977654],
       [0.00609756, 0.01727642, 0.00406504, ..., 0.01422764, 0.00813008,
        0.00406504],
       ...,
       [0.011994  , 0.02098951, 0.        , ..., 0.005997  , 0.01349325,
        0.01349325],
       [0.00407747, 0.01019368, 0.00611621, ..., 0.01529052, 0.00917431,
        0.00917431],
       [0.01446945, 0.01607717, 0.00803859, ..., 0.00321543, 0.00643087,
        0.01125402]], dtype=float32)

In [7]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, precision_score, average_precision_score
RFC = RandomForestClassifier()
X_train, x_test, Y_train, y_test = train_test_split(vg_objects["Image ID"], vg_objects["Class"], test_size=0.2,random_state=42)
# X_test, x_val, Y_test, y_val = train_test_split(x_test,y_test, test_size=0.2, random_state=42)

In [8]:
descriptor_features_withID = {}
for imgId, feat in zip(img_names, descriptor_features):
    descriptor_features_withID[imgId] = feat
# list(vg_objects["Image ID"])
# list(set(vg_objects["Image ID"]))

In [9]:
RFC.fit([descriptor_features_withID[str(val)] for val in X_train],Y_train)
# y_pred = RFC.predict([descriptor_features_withID[str(val)] for val in x_test])
# print("Precision score: ", precision_score(y_test, y_pred, average='macro'))
score = RFC.score([descriptor_features_withID[str(val)] for val in x_test],y_test)
print("Score: ", score)
 # Is it really that bad or I am doing something really wrong ?

Score:  0.111


### Zad 2  - VGG 16

In [10]:
modelVGG16 = load_vgg16(fc=True)
imgFeatures = {}
for imgID in img_names:
    feat = create_features(str(imgID), modelVGG16)
    imgFeatures[imgID] = feat

In [11]:
RFC = RandomForestClassifier()
RFC.fit([imgFeatures[str(val)] for val in X_train], Y_train)
score = RFC.score([imgFeatures[str(val)] for val in x_test],y_test)
print("Score: ", score)

Score:  0.255


### Zad 3

In [None]:
from keras.layers import Dense
base_model = VGG16(include_top=False, weights='imagenet', input_shape=(244,244,3))
layer = base_model.output
layer = Dense(2)(layer)
model = Model(base_model.input,output=layer)

imgFeatures_VGG16_Dense = create_features_parallel(img_names,model)