In [None]:
import pandas as pd
import numpy as np

import os.path
from PIL import Image, ImageChops
from scipy.misc import imresize

account_data_01 = pd.read_csv('results/picture_specific_dataset.csv', parse_dates = [2, 7, 24, 28, 32, 36, 40, 44], dayfirst = True, low_memory = False)
accountnames = pd.read_csv('results/stats_table.csv', sep = ',', encoding = "ISO-8859-1")
accountnames.drop(['state',
                   'longitude',
                   'latitude',
                   'followers',
                   'number_useful_pictures',
                   'number_images_trash',
                   'number_images_total',
                   'zip_code'], axis = 1, inplace = True)

account_data_02 = account_data_01.merge(accountnames, on = 'resort', how = 'inner')

print('Number of records in account_data_01:', account_data_01.shape[0])
print('Number of records in account_data_02:', account_data_02.shape[0])

In [None]:
account_data_02.head()

In [None]:
# function to crop a picture in order to get rid of borders around the picture

def trim(im):
    bg = Image.new(im.mode, im.size, im.getpixel((0,0)))
    diff = ImageChops.difference(im, bg)
    diff = ImageChops.add(diff, diff, 2.0, -10)
    bbox = diff.getbbox()
    
    if bbox:
        return im.crop(bbox)

# let's create a list with only image_id and accountname for each picture
image_list = account_data_02[['image_id', 'accountname']].values.tolist()

# import, crop, resize and save the picture
for j in range(0, len(image_list)):
    
    # define the location of the pictures of a particular resort
    folder_path = 'images/{}/'.format(str(image_list[j][1]))

    # import the picture if it exists
    if os.path.isfile(folder_path + str(image_list[j][0]) + str('.jpg')):
        img = Image.open(folder_path + str(image_list[j][0]) + str('.jpg'))
        
        # crop the picture (get rid of the borders)
        img_cropped = trim(img)
        
        # convert picture to an array
        arr = np.array(img_cropped)
        
        # resize the picture
        img_resized_64  = imresize(arr, ( 64,  64))
        img_resized_100 = imresize(arr, (100, 100))
        img_resized_144 = imresize(arr, (144, 144))
        img_resized_196 = imresize(arr, (196, 196))
        img_resized_256 = imresize(arr, (256, 256))
        
        # create a directory
        if not os.path.exists(folder_path + '/crop/'):
            os.makedirs(folder_path + '/crop/')
        
        # save the cropped and resized versions of the picture
        scipy.misc.imsave(folder_path + '/crop/'+ str(image_list[j][0]) + str('_64.jpg'), img_resized_64)
        scipy.misc.imsave(folder_path + '/crop/'+ str(image_list[j][0]) + str('_100.jpg'), img_resized_100)
        scipy.misc.imsave(folder_path + '/crop/'+ str(image_list[j][0]) + str('_144.jpg'), img_resized_144)
        scipy.misc.imsave(folder_path + '/crop/'+ str(image_list[j][0]) + str('_196.jpg'), img_resized_196)
        scipy.misc.imsave(folder_path + '/crop/'+ str(image_list[j][0]) + str('_256.jpg'), img_resized_256)
        
        if (j > 0) and (j%1000) == 0:
            print(j, 'images have been processed')