In [1]:
import os

import numpy as np
import pandas as pd
from keras.preprocessing import image
import keras.applications.resnet50 as resnet50
import keras.applications.xception as xception
import keras.applications.inception_v3 as inception_v3

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
resnet_model = resnet50.ResNet50(weights='imagenet')
inception_model = inception_v3.InceptionV3(weights='imagenet')
xception_model = xception.Xception(weights='imagenet')

In [3]:
from PIL import Image
import cv2

def image_classify(model, pak, img, top_n=1):
    """Classify image and return top matches."""
    target_size = (224, 224)
    if img.size != target_size:
        img = img.resize(target_size)
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = pak.preprocess_input(x)
    preds = model.predict(x)
    return pak.decode_predictions(preds, top=3)


def classify_and_plot(image_path):
    """Classify an image with different models.
    Plot it and its predicitons.
    """
    img = Image.open(image_path)
    resnet_preds = image_classify(resnet_model, resnet50, img)
    xception_preds = image_classify(xception_model, xception, img)
    inception_preds = image_classify(inception_model, inception_v3, img)
    preds_arr = [('Resnet50', resnet_preds), ('xception', xception_preds), ('Inception', inception_preds)]
    return (img, preds_arr)

In [4]:
image_files = [x.path for x in os.scandir('E:/Kaggle/Avito/Images0')]

In [5]:
from collections import Counter
from pprint import pprint

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

def get_data_from_image(dat):
    scores1 = [i[1] for i in dat[1][0][1][0]]
    labels1 = [i[2] for i in dat[1][0][1][0]]
    scores2 = [i[1] for i in dat[1][1][1][0]]
    labels2 = [i[2] for i in dat[1][1][1][0]]
    scores3 = [i[1] for i in dat[1][2][1][0]]
    labels3 = [i[2] for i in dat[1][2][1][0]]
    df = scores1 + labels1 + scores2 + labels2 + scores3 + labels3
    return df

dat = classify_and_plot(image_files[0])
df = get_data_from_image(dat)
print(df)

['iPod', 'hand-held_computer', 'cellular_telephone', 0.31853566, 0.1416138, 0.09922744, 'hand-held_computer', 'screen', 'oscilloscope', 0.98155826, 0.008925793, 0.0031165234, 'monitor', 'desktop_computer', 'hand-held_computer', 0.3118162, 0.28669664, 0.21826982]


In [6]:
import tqdm

In [15]:
%%time
predictions = []
for i in tqdm.trange(len(image_files)):
    try:
        dat = classify_and_plot(image_files[i])
        df = [image_files[i][24:-4]]+get_data_from_image(dat)
        predictions.append(df)
    except:
        predictions.append([image_files[i][24:-4]]+
                           [None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None])

100%|████████████████████████████████████████████████████████████████████████| 278169/278169 [4:10:59<00:00, 18.47it/s]


Wall time: 4h 10min 59s


In [16]:
df = pd.DataFrame(predictions,columns=['image','Res50_label1','Res50_label2','Res50_label3','Res50_score1','Res50_score2','Res50_score3',
                                       'Xcept_label1','Xcept_label2','Xcept_label3','Xcept_score1','Xcept_score2','Xcept_score3',
                                       'Incept_label1','Incept_label2','Incept_label3','Incept_score1','Incept_score2','Incept_score3'])

In [17]:
df.to_csv('E:/Kaggle/Avito/Image_preds0.csv',index=False)

In [18]:
image_files = [x.path for x in os.scandir('E:/Kaggle/Avito/Images1')]

In [19]:
%%time
predictions = []
for i in tqdm.trange(len(image_files)):
    try:
        dat = classify_and_plot(image_files[i])
        df = [image_files[i][24:-4]]+get_data_from_image(dat)
        predictions.append(df)
    except:
        predictions.append([image_files[i][24:-4]]+
                           [None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None])
df = pd.DataFrame(predictions,columns=['image','Res50_label1','Res50_label2','Res50_label3','Res50_score1','Res50_score2','Res50_score3',
                                       'Xcept_label1','Xcept_label2','Xcept_label3','Xcept_score1','Xcept_score2','Xcept_score3',
                                       'Incept_label1','Incept_label2','Incept_label3','Incept_score1','Incept_score2','Incept_score3'])
df.to_csv('E:/Kaggle/Avito/Image_preds1.csv',index=False)

100%|████████████████████████████████████████████████████████████████████████| 278167/278167 [4:03:21<00:00, 19.05it/s]


Wall time: 4h 3min 25s


In [20]:
image_files = [x.path for x in os.scandir('E:/Kaggle/Avito/Images2')]

In [21]:
%%time
predictions = []
for i in tqdm.trange(len(image_files)):
    try:
        dat = classify_and_plot(image_files[i])
        df = [image_files[i][24:-4]]+get_data_from_image(dat)
        predictions.append(df)
    except:
        predictions.append([image_files[i][24:-4]]+
                           [None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None])
df = pd.DataFrame(predictions,columns=['image','Res50_label1','Res50_label2','Res50_label3','Res50_score1','Res50_score2','Res50_score3',
                                       'Xcept_label1','Xcept_label2','Xcept_label3','Xcept_score1','Xcept_score2','Xcept_score3',
                                       'Incept_label1','Incept_label2','Incept_label3','Incept_score1','Incept_score2','Incept_score3'])
df.to_csv('E:/Kaggle/Avito/Image_preds2.csv',index=False)

100%|████████████████████████████████████████████████████████████████████████| 278168/278168 [3:58:58<00:00, 19.40it/s]


Wall time: 3h 59min 2s


In [22]:
image_files = [x.path for x in os.scandir('E:/Kaggle/Avito/Images3')]

In [23]:
%%time
predictions = []
for i in tqdm.trange(len(image_files)):
    try:
        dat = classify_and_plot(image_files[i])
        df = [image_files[i][24:-4]]+get_data_from_image(dat)
        predictions.append(df)
    except:
        predictions.append([image_files[i][24:-4]]+
                           [None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None])
df = pd.DataFrame(predictions,columns=['image','Res50_label1','Res50_label2','Res50_label3','Res50_score1','Res50_score2','Res50_score3',
                                       'Xcept_label1','Xcept_label2','Xcept_label3','Xcept_score1','Xcept_score2','Xcept_score3',
                                       'Incept_label1','Incept_label2','Incept_label3','Incept_score1','Incept_score2','Incept_score3'])
df.to_csv('E:/Kaggle/Avito/Image_preds3.csv',index=False)

100%|████████████████████████████████████████████████████████████████████████| 278167/278167 [4:00:14<00:00, 19.30it/s]


Wall time: 4h 18s


In [24]:
image_files = [x.path for x in os.scandir('E:/Kaggle/Avito/Images4')]

In [25]:
%%time
predictions = []
for i in tqdm.trange(len(image_files)):
    try:
        dat = classify_and_plot(image_files[i])
        df = [image_files[i][24:-4]]+get_data_from_image(dat)
        predictions.append(df)
    except:
        predictions.append([image_files[i][24:-4]]+
                           [None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None])
df = pd.DataFrame(predictions,columns=['image','Res50_label1','Res50_label2','Res50_label3','Res50_score1','Res50_score2','Res50_score3',
                                       'Xcept_label1','Xcept_label2','Xcept_label3','Xcept_score1','Xcept_score2','Xcept_score3',
                                       'Incept_label1','Incept_label2','Incept_label3','Incept_score1','Incept_score2','Incept_score3'])
df.to_csv('E:/Kaggle/Avito/Image_preds4.csv',index=False)

100%|████████████████████████████████████████████████████████████████████████| 278167/278167 [4:09:15<00:00, 18.60it/s]


Wall time: 4h 9min 20s


In [7]:
image_files = [x.path for x in os.scandir('E:/Kaggle/Avito/imagestest/data/competition_files/test_jpg')]

In [None]:
%%time
predictions = []
for i in tqdm.trange(len(image_files)):
    try:
        dat = classify_and_plot(image_files[i])
        df = [image_files[i][59:-4]]+get_data_from_image(dat)
        predictions.append(df)
    except:
        predictions.append([image_files[i][24:-4]]+
                           [None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None])
df = pd.DataFrame(predictions,columns=['image','Res50_label1','Res50_label2','Res50_label3','Res50_score1','Res50_score2','Res50_score3',
                                       'Xcept_label1','Xcept_label2','Xcept_label3','Xcept_score1','Xcept_score2','Xcept_score3',
                                       'Incept_label1','Incept_label2','Incept_label3','Incept_score1','Incept_score2','Incept_score3'])
df.to_csv('E:/Kaggle/Avito/Image_predstest.csv',index=False)

  5%|███▉                                                                     | 25189/465829 [24:33<7:09:38, 17.09it/s]