## Description

This notebook downloads and crops the data used in the final dataset. To use, simply run all the cells

In [None]:
import os
import sys
import shutil
import PIL
import requests
import PIL.Image as Image
from tqdm import tqdm
import numpy as np
import pandas as pd

def load_image(filename):
    return Image.open(filename).convert('RGB') #this conversion is crucial as PIL will open png files with either 3 or 4 channels depending on encoding

def center_crop(img):
    '''Algorithm derived from nvidia preprocessor'''
    crop = np.min(img.shape[:2])
    img = img[(img.shape[0]-crop)//2:(img.shape[0]+crop)//2,(img.shape[1]-crop)//2:(img.shape[1]+crop)//2]
    img = Image.fromarray(img,'RGB')
    return img

def resize_and_crop(fp,width,height):
    if os.path.exists(fp):
      return
    try:
      img=np.asarray(load_image(fp),dtype=np.uint8)
    except:
      return
    img=center_crop(img)
    
    
    img=img.resize((width,height),Image.LANCZOS)
    img=np.array(img)
    
    canvas = np.zeros([width,width,3],dtype=np.uint8)
    canvas[(width-height)//2: (width+height)//2,:]=img
    return Image.fromarray(canvas )


def download_images(reddit_csv):
    print('Downloading and Cropping Images')
    fp ='Data'
    fpt = os.path.join('Data','temp')
    fpw= os.path.join(fp,'Weapon')
    fpp =os.path.join(fp,'People')
    if not os.path.exists(fp):
        os.mkdir(fp)
    if not os.path.exists(fpt):
        os.mkdir(fpt)
    if not os.path.exists(fpw):
        os.mkdir(fpw)   
    if not os.path.exists(fpp):
        os.mkdir(fpp)
    df=pd.read_csv(reddit_csv)
    
    post_fix = {'Success':0,'Failed':0}

    
    pbar = tqdm(df.iterrows(),total=len(df))
    
    for index, row in pbar:
        image_url = row.url
        filename = os.path.join(fpt,row['url'].split('/')[-1])
        label = row.label
        filename=os.path.join(fpt,row['url'].split('/')[-1])
        
        try:
            r= requests.get(image_url,stream=True)
        except:
            post_fix['Failed']+=1 
            continue
        if r.status_code==200:
            r.raw.decode_content = True
            with open(filename,'wb') as f:
                shutil.copyfileobj(r.raw,f)
            post_fix['Success']+=1
        else:
            post_fix['Failed']+=1
        
        ##Crop
        canvas=resize_and_crop(filename,256,256)
        if label =='People':
            fn2 =os.path.join(fpp,row['url'].split('/')[-1])
        if label =='Weapon':
            fn2 =os.path.join(fpw,row['url'].split('/')[-1])
        if canvas is not None:
            canvas.save(fn2)
        try:
            os.remove(filename)
        except:
            pass
        pbar.set_postfix(post_fix)
    pbar.close()
    os.rmdir(fpt)


In [None]:
download_images('data_download.csv')