In [8]:
import pickle
import boto3
import os
import errno
from utils import *


def save_obj(obj, path, name):
    """ Save an object into a pickle file. """
    with open(path + name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)


def load_obj(path, name):
    """ Load a pickle file into an object. """
    with open(path + name + '.pkl', 'rb') as f:
        return pickle.load(f)


def create_directory(directory):
    """ Create a directory if it does not exist and return the directory. """
    if not os.path.exists(directory):
        try:
            os.makedirs(directory)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise
    return directory


def write_image_into_file(key, file_name, file_directory):
    """ Write images into directory. """
    file_directory = create_directory(file_directory)
    session = boto3.session.Session()
    try:
        obj = session.resource('s3').Bucket('root-image-uploads').Object(key=key).get()['Body'].read()
        with open(file_directory + file_name + '.jpg', 'wb') as f:
            f.write(obj)
    except session.resource('s3').meta.client.exceptions.NoSuchKey:
        print("no such key in bucket")
    return None


def download_images_from_s3_bucket(df_image, file_directory, total_images=100):
    """ download images from S3 bucket. """
    image_count = 0
    for key, image_id in zip(df_image['s3_file_path'], df_image['img_id']):
        if image_count >= total_images:
            break
        else:
            image_count += 1
        write_image_into_file(key, image_id, file_directory)
    return None

In [36]:
!mkdir root_image
!mkdir root_image/damage_angle
!mkdir root_image/damage_in_context
!mkdir root_image/damage_close_up
!mkdir root_image/back_passenger_side
!mkdir root_image/front_passenger_side
!mkdir root_image/back_driver_side
!mkdir root_image/front_driver_side

mkdir: root_image: File exists
mkdir: root_image/damage_angle: File exists
mkdir: root_image/damage_in_context: File exists


In [4]:
con = db_connection()
con.autocommit = True

In [23]:
sql_query = '''
with claim_payment as (
select
    dc.claim_id,
    sum(paid_loss_dollar_amount) as paid,
    sum(reserved_dollar_amount) as reserved
from edw.dim_claim dc
join edw.fact_financials_accumulating ffa on ffa.claim_k = dc.claim_k
join edw.dim_coverage cov on ffa.coverage_k = cov.coverage_k
where development_age_in_months = 4 and cov.symbol in ('pd', 'col', 'comp')
group by 1
)
select 
    c.id as claim_id,
    img.id as img_id,
    img.s3_file_path,
    fnol.label,
    fnol.source,
    pay.paid
from server_public.first_notice_of_loss_photos fnol
join server_public.image_uploads img on img.id = fnol.image_upload_id
join server_public.claims c on c.first_notice_of_loss_id = fnol.first_notice_of_loss_id
join claim_payment pay on c.id = pay.claim_id
'''

df = pd.read_sql(sql_query, con)
df.to_pickle("paid_claim_fnol_images.pkl.gz")

In [38]:
for label in ('damage_angle', 'back_passenger_side',
       'front_passenger_side', 'mileage', 'damage_in_context',
       'damage_close_up', 'back_driver_side', 'front_driver_side'):

    data = df[(df['label']==label)]
    print(label, data.shape)
    download_images_from_s3_bucket(data, 'root_image/'+label+'/', total_images=100)

damage_angle (5511, 6)
back_passenger_side (5583, 6)
front_passenger_side (6181, 6)
mileage (5186, 6)
damage_in_context (5536, 6)
damage_close_up (6288, 6)
back_driver_side (5505, 6)
front_driver_side (6381, 6)


In [39]:
import os
import urllib.request

def download(url):
    filename = url.split("/")[-1]
    if not os.path.exists(filename):
        urllib.request.urlretrieve(url, filename)

# Tool for creating lst file
download('https://raw.githubusercontent.com/apache/incubator-mxnet/master/tools/im2rec.py')