In [1]:
import pandas as pd
import requests
import re
import os
import numpy as np
import concurrent
import concurrent.futures

from keras.applications import InceptionV3
from keras.applications.inception_v3 import preprocess_input

from keras.models import Model
from keras.layers import Input, Reshape, MaxPooling1D, Flatten, Concatenate

from keras.preprocessing.image import image, img_to_array, load_img, ImageDataGenerator

from PIL import Image

Using TensorFlow backend.


In [2]:
#Function for Cropping Image by thirds
def crop(im, height, width):
    imgwidth, imgheight = im.size
    rows = np.int(imgheight/height)
    cols = np.int(imgwidth/width)
    for i in range(rows):
        for j in range(cols):
            box = (j*width, i*height, (j+1)*width, (i+1)*height)
            yield im.crop(box)
            
#Custom generator that takes in a pandas dataframe with file paths,
#    allowing training/predicting without loading all files into directory
def generator_from_df(df, img_dir, batch_size, pano_size, img_size, shuffle=False):
    nbatches, n_skipped_per_epoch = divmod(df.shape[0], batch_size)
    
    if shuffle:
        df = df.sample(frac=1) #shuffles the dataframe
        
    while 1:
        for i in range(nbatches):
            j = i*batch_size+batch_size
            
            sub = df.iloc[i:j]
            mask = np.zeros(sub.columns.shape,dtype=bool)
            mask[0]=1
            img_files = [f[0] for f in sub[sub.columns[mask]].values]
            try:
                out_images = [[],[],[]] #three
                for img_file in img_files:  
                    im = load_img(os.path.join(img_dir,img_file))
                    imgwidth, imgheight = im.size
                    height = np.int(imgheight//3)#split into thirds
                    
                    start_num = 0
                    for k, piece in enumerate(crop(im, height, imgwidth), start_num):
                        img = Image.new('RGB', (imgwidth, height), 255)
                        img.paste(piece)
                        img = img.resize((img_size[1],img_size[0])) #may change depending on your environment
                        out_images[k].append(preprocess_input(img_to_array(img)))
                
                yield out_images

            except IOError as err:
                print("ERROR!",err)

In [7]:
#Example for creating a keras model that can use the previous data generator for panoramas.
#  The inception model can be changed to other architectures.
#  After the row-wise max-pooling, you can add more layers for custom classification models.

input_shape=(125, 260, 3) #Can be adjusted, up to 360x751 (size of each panorama individually)
#This input pretends that it is RGB for use with pre-trained models.
#  I would recommend changing it to one channel (grayscale) when training models from the ground-up
#  To do this, change the input shape from 3 to 1, and 'RGB' in the generator to 'L'

def create_model(input_shape):
    base_input = Input(shape=input_shape)
    #using pre-trained model. 
    #  Change as you see fit to train from ground-up or try different architectures
    app_model = InceptionV3(weights = 'imagenet',  
                      include_top = False, 
                      input_tensor = base_input,
                      input_shape = input_shape,
                      pooling = None)
    net = app_model.output

    #---- Row-Wise Max-Pooling (Pooling Panorama as a Cylinder where x-axis loops)
    w,h,c = [int(v) for v in app_model.output.shape[1:]] #these values change with INPUT_SHAPE OR APP_MODEL
    net = Reshape((w, h*c))(net)
    net = MaxPooling1D()(net)
    net = Reshape((int(w/2), h, c))(net)
    #---- 
    base_model = Model(app_model.input, net, name="base_model")
    
    #Now combining all into one network that produces features for all three views
    input_p1 = Input(shape=input_shape, name='pano_1')
    input_p2 = Input(shape=input_shape, name='pano_2')
    input_p3 = Input(shape=input_shape, name='pano_3')
    inputs = [input_p1,input_p2,input_p3]
    
    #flatten into one giant vector
    outputs = [None]*len(inputs)
    for i,input_i in enumerate(inputs):
        outputs[i] = base_model(input_i)
    out_vectors = Concatenate(axis=-1)(outputs)
    out_vectors = Flatten()(out_vectors) 
    
    model = Model(inputs=inputs, outputs=out_vectors, name='Panorama_Model')
    #keras predict is not always thread-safe unless you freeze and compile
    for layer in model.layers:
        layer.trainable = False
    model.compile(optimizer='rmsprop', loss='categorical_crossentropy') #not actually training
    return model

model = create_model(input_shape)
model._make_predict_function()
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
pano_1 (InputLayer)             (None, 125, 260, 3)  0                                            
__________________________________________________________________________________________________
pano_2 (InputLayer)             (None, 125, 260, 3)  0                                            
__________________________________________________________________________________________________
pano_3 (InputLayer)             (None, 125, 260, 3)  0                                            
__________________________________________________________________________________________________
base_model (Model)              (None, 1, 6, 2048)   21802784    pano_1[0][0]                     
                                                                 pano_2[0][0]                     
          

In [None]:
PANO_DIR = '<PATH TO PANORMA DIR>' 

panorama_files = []#list of all panorama files
for thingID in list(os.listdir(os.path.join(PANO_DIR))):
    #thingID corresponds to file in https://thingiverse.com/thing:<thingID>
    thing_files = list()
    for filename in os.listdir(os.path.join(PANO_DIR,thingID)): 
        if filename.endswith('.png'):
            thing_files.append(os.path.join(thingID,filename))
    panorama_files.extend(list(set(thing_files)))
    
print("Found {} Panoramas".format(len(panorama_files)))

In [None]:
#Example Script to generate features for all renders in ThingiPano. takes a couple days via CPU

pano_feature_file = 'pano_vecs.csv' #path to csv to write all features (will be giant ~300GB)
num_threads = 1 #number of threads to predict on

with open(pano_feature_file,'w') as out_f:
    out_f.write("item_id,file_name,image_features\n")
    
    chunk_size = 128
    num_chunks = len(panorama_files)//chunk_size +1
    
    panorama_files_chunks = []
    for i in range(num_chunks):
        if chunk_size*i>=len(panorama_files):
            break
        top_range = chunk_size*i+chunk_size
        if top_range > len(panorama_files):
            top_range = len(panorama_files)
        panorama_files_chunks.append(pd.DataFrame(panorama_files[chunk_size*i:top_range]))
    print("Running on {} Chunks".format(len(panorama_files_chunks)))

    def write_pano_features(df):
        datagen = generator_from_df(df,PANO_DIR,1,input_shape)
        pano_features = model.predict_generator(datagen,len(df[0].tolist()),verbose=0,workers=1,use_multiprocessing=True)
        
        item_ids =    [fn.split('/')[0] for fn in df[0].tolist()]
        thing_names = ['/'.join(fn.split('/')[1:]) for fn in df[0].tolist()] #put in quotes as some filenames have commas
        for j in range(len(pano_features)):
            out_f.write(item_ids[j]+',"'+thing_names[j]+'","['+",".join([str(ft) for ft in pano_features[j]])+']"\n')
        return True  
 
    with concurrent.futures.ThreadPoolExecutor(num_threads) as executor: #increase the number to use more than one thread
        for i,data_part in enumerate(zip(panorama_files_chunks,executor.map(write_pano_features,panorama_files_chunks))):
            if i%10==0:
                print("chunks processed: {}\t{}%".format(i,100.0*i/len(panorama_files_chunks)))
            pass