In [21]:
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

%matplotlib inline
plt.style.use('ggplot')
plt.rcParams["figure.figsize"] = (14,6)

In [46]:
results_path = '../'
random_state = 20
nb_breeds = 10


In [23]:
#!wget -P /../data 'http://vision.stanford.edu/aditya86/ImageNetDogs/images.tar'
#!tar xf /../data/images.tar -C /content/data
#!rm -rf /content/data/images.tar

In [24]:
# For the given path, get the List of all files in the directory tree 

def getListOfFiles(dirName):
    # create a list of file and sub directories 
    # names in the given directory 
    listOfFile = os.listdir(dirName)
    allFiles = list()
    # Iterate over all the entries
    for entry in listOfFile:
        # Create full path
        fullPath = os.path.join(dirName, entry)
        # If entry is a directory then get the list of files in this directory 
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
                
    return allFiles

In [25]:
files = getListOfFiles('../data/Images/')
np.save('files', files)

In [26]:
files[0:20]

['../data/Images/n02085620-Chihuahua\\n02085620_10074.jpg',
 '../data/Images/n02085620-Chihuahua\\n02085620_10131.jpg',
 '../data/Images/n02085620-Chihuahua\\n02085620_10621.jpg',
 '../data/Images/n02085620-Chihuahua\\n02085620_1073.jpg',
 '../data/Images/n02085620-Chihuahua\\n02085620_10976.jpg',
 '../data/Images/n02085620-Chihuahua\\n02085620_11140.jpg',
 '../data/Images/n02085620-Chihuahua\\n02085620_11238.jpg',
 '../data/Images/n02085620-Chihuahua\\n02085620_11258.jpg',
 '../data/Images/n02085620-Chihuahua\\n02085620_11337.jpg',
 '../data/Images/n02085620-Chihuahua\\n02085620_11477.jpg',
 '../data/Images/n02085620-Chihuahua\\n02085620_1152.jpg',
 '../data/Images/n02085620-Chihuahua\\n02085620_11696.jpg',
 '../data/Images/n02085620-Chihuahua\\n02085620_11818.jpg',
 '../data/Images/n02085620-Chihuahua\\n02085620_11948.jpg',
 '../data/Images/n02085620-Chihuahua\\n02085620_1205.jpg',
 '../data/Images/n02085620-Chihuahua\\n02085620_12101.jpg',
 '../data/Images/n02085620-Chihuahua\\n0208

In [27]:
f = '../data/Annotation\\n02085620-Chihuahua\\n02085620_12718'
f.split('-')[1].split('\\')[0]

'Chihuahua'

In [40]:
labels = []

for f in files :
    labels.append(f.split('-')[1].split('\\')[0])
    
df_breed_filename = pd.DataFrame({'filename' : files, 'breed' : labels})
breeds = list(df_breed_filename.breed.unique())
df_n_breeds_filename = df_breed_filename[df_breed_filename['breed'].isin(breeds[10:10+nb_breeds])]


Y = df_breed_filename['breed']
X = df_breed_filename

df_train_breed_filename, df_test_breed_filename, y_train, y_test = train_test_split(X, Y, random_state=random_state)


In [41]:
df_train_breed_filename.head()

Unnamed: 0,filename,breed
17514,../data/Images/n02110958-pug\n02110958_16217.jpg,pug
20394,../data/Images/n02115913-dhole\n02115913_5392.jpg,dhole
14607,../data/Images/n02107142-Doberman\n02107142_64...,Doberman
8317,../data/Images/n02097047-miniature_schnauzer\n...,miniature_schnauzer
19121,../data/Images/n02112706-Brabancon_griffon\n02...,Brabancon_griffon


In [29]:
import cv2

sift = cv2.xfeatures2d.SIFT_create()
sift_object = cv2.xfeatures2d.SIFT_create()

df_train_descriptor = pd.DataFrame(columns = range(0,128))


for i in range(len(df_train_breed_filename)):
    img_filename =df_train_breed_filename.iloc[i]['filename']
    img = cv2.imread(img_filename)
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # one image: dataframe with 128 sift features colmuns and 1 row per descriptors
    df_desc = pd.DataFrame(sift.detectAndCompute(img_gray, None)[1])

    # add filename column
    df_desc['filename'] = img_filename
    # all images : dataframe with 128 sift feature + 1 filename feature
    #1 row per descriptor
    df_train_descriptor = df_train_descriptor.append(df_desc)

df_test_descriptor = pd.DataFrame(columns = range(0,128))


for i in range(len(df_test_breed_filename)):
    img_filename =df_test_breed_filename.iloc[i]['filename']
    img = cv2.imread(img_filename)
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # one image: dataframe with 128 sift features colmuns and 1 row per descriptors
    df_desc = pd.DataFrame(sift.detectAndCompute(img_gray, None)[1])

    # add filename column
    df_desc['filename'] = img_filename
    # all images : dataframe with 128 sift feature + 1 filename feature
    #1 row per descriptor
    df_test_descriptor = df_test_descriptor.append(df_desc)

In [45]:
df_train_breed_filename.head()

Unnamed: 0,filename,breed
17514,../data/Images/n02110958-pug\n02110958_16217.jpg,pug
20394,../data/Images/n02115913-dhole\n02115913_5392.jpg,dhole
14607,../data/Images/n02107142-Doberman\n02107142_64...,Doberman
8317,../data/Images/n02097047-miniature_schnauzer\n...,miniature_schnauzer
19121,../data/Images/n02112706-Brabancon_griffon\n02...,Brabancon_griffon


In [47]:
df_train_breed_filename.to_csv(results_path + 'df_train_' + str(nb_breeds) + '_breed_filename.csv', index=False)
df_test_breed_filename.to_csv(results_path + 'df_test_' + str(nb_breeds) + '_breed_filename.csv', index=False)

In [48]:
results_path

'../'

In [34]:
df_breed_filename.to_csv(results_path + 'df_breed_filename.csv', index=False)
df_n_breeds_filename.to_csv(results_path + 'df_' + str(nb_breeds) + '_breeds_filename.csv', index=False)

df_train_breed_filename.to_csv(results_path + 'df_train_' + str(nb_breeds) + '_breed_filename.csv', index=False)
df_test_breed_filename.to_csv(results_path + 'df_test_' + str(nb_breeds) + '_breed_filename.csv', index=False)

np.save(results_path + 'y_train_' + str(nb_breeds) + '_breeds', y_train)
np.save(results_path + 'y_test_' + str(nb_breeds) + '_breeds', y_test)

df_train_descriptor.to_csv(results_path + 'df_train_' + str(nb_breeds) + '_descriptor.csv', index=False)
df_test_descriptor.to_csv(results_path + 'df_test_' + str(nb_breeds) + '_descriptor.csv', index=False)

NameError: name 'df_train_descriptor' is not defined