## Imports

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pylab as plt
import os 
from glob import glob
import cv2
from PIL import Image
import random
from io import BytesIO
from tqdm import tqdm_notebook as tqdm
tqdm().pandas()

## Download carvana dataset from kaggle

___\* Download the carvana dataset from kaggle (and place it in a folder: ./input here) \*___

<br>

! pip install kaggle

<br>

! kaggle competitions download -c carvana-image-masking-challenge

<br>

%cd /home/jupyter/.kaggle _\* The hidden kaggle folder in your linux/ubuntu instance\*_

<br>

! wget https://a.uguu.se/XvFMekYiSDVC_kaggle.json _\* download the json authentication from your kaggle account \*_

<br>

! mv XvFMekYiSDVC_kaggle.json kaggle.json _\*rename it\*_

<br>

! chmod 600 ~/.kaggle/kaggle.json _\* move it to .kaggle folder and give write permissions \*_


## Data I/O

In [None]:
INPUT_PATH = './input'
DATA_PATH = INPUT_PATH
TRAIN_DATA = os.path.join(DATA_PATH, "train")
TRAIN_MASKS_DATA = os.path.join(DATA_PATH, "train_masks")
TEST_DATA = os.path.join(DATA_PATH, "test")
TRAIN_MASKS_CSV_FILEPATH = os.path.join(DATA_PATH, "train_masks.csv")
METADATA_CSV_FILEPATH = os.path.join(DATA_PATH, "metadata.csv")
TRAIN_MASKS_CSV = pd.read_csv(TRAIN_MASKS_CSV_FILEPATH)
METADATA_CSV = pd.read_csv(METADATA_CSV_FILEPATH)
train_files = glob(os.path.join(TRAIN_DATA, "*.jpg"))
train_ids = [s[len(TRAIN_DATA)+1:-4] for s in train_files]
test_files = glob(os.path.join(TEST_DATA, "*.jpg"))
test_ids = [s[len(TEST_DATA)+1:-4] for s in test_files]

### Helper functions

In [None]:
def get_filename(image_id, image_type):
    check_dir = False
    if "Train" == image_type:
        ext = 'jpg'
        data_path = TRAIN_DATA
        suffix = ''
    elif "Train_mask" in image_type:
        ext = 'gif'
        data_path = TRAIN_MASKS_DATA
        suffix = '_mask'
    elif "Test" in image_type:
        ext = 'jpg'
        data_path = TEST_DATA
        suffix = ''
    elif "Foreground" in image_type:
        ext = 'jpg'
        data_path = Foreground
        suffix = ''
    elif "Background" in image_type:
        ext = 'jpg'
        data_path = Background
        suffix = ''
    else:
        raise Exception("Image type '%s' is not recognized" % image_type)

    if check_dir and not os.path.exists(data_path):
        os.makedirs(data_path)

    return os.path.join(data_path, "{}{}.{}".format(image_id, suffix, ext))
def get_image_data(image_id, image_type, **kwargs):
    if 'mask' in image_type:
        img = _get_image_data_pil(image_id, image_type, **kwargs)
    else:
        img = _get_image_data_opencv(image_id, image_type, **kwargs)
    return img

def _get_image_data_opencv(image_id, image_type, **kwargs):
    fname = get_filename(image_id, image_type)
    img = cv2.imread(fname)
    assert img is not None, "Failed to read image : %s, %s" % (image_id, image_type)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img


def _get_image_data_pil(image_id, image_type, return_exif_md=False, return_shape_only=False):
    fname = get_filename(image_id, image_type)
    try:
        img_pil = Image.open(fname)
    except Exception as e:
        assert False, "Failed to read image : %s, %s. Error message: %s" % (image_id, image_type, e)

    if return_shape_only:
        return img_pil.size[::-1] + (len(img_pil.getbands()),)

    img = np.asarray(img_pil)
    assert isinstance(img, np.ndarray), "Open image is not an ndarray. Image id/type : %s, %s" % (image_id, image_type)
    if not return_exif_md:
        return img
    else:
        return img, img_pil._getexif()

### Create background images

***Scraping images from google***

___\* Use Ctrl-Shift-J to open console in google chrome and run the script \*___

urls = Array.from(document.querySelectorAll('.rg_di .rg_meta')).map(el=>JSON.parse(el.textContent).ou);
window.open('data:text/csv;charset=utf-8,' + escape(urls.join('\n')));

### Read the scraped images as background images 

In [None]:
background_images = [file for file in glob('./images/*.jpg')]
background_images.sort(reverse=False)

## Create foreground images (cars+masks)

In [None]:
path = './input/delete/'
for i in tqdm(range(len(train_ids))):
    image_id = train_ids[i]
    plt.figure(figsize=(20, 20))
    img = get_image_data(image_id, "Train")
    mask = get_image_data(image_id, "Train_mask")
    img_masked = cv2.bitwise_and(img, img, mask=mask)
    cv2.imwrite(os.path.join(path ,str(image_id)+".jpg"), img_masked)

## Create masked cars

In [None]:
path = './input/Background_function_mix/'
z=0
image_ids=[]
for i in tqdm(range(403)):
    img = cv2.imread(background_images[i])
    for i in range(12):  
        
        #pre-processing: 
        image_id = train_ids[z+i]
        image_ids.append(image_id)   
        z+=1
        mask = get_image_data(image_id, "Train_mask")
        foregroundimg_grayscale= cv2.imread("./input/Foreground/"+str(image_id)+".jpg",0)
        foregroundimg = cv2.imread("./input/Foreground/"+str(image_id)+".jpg")
        h,w = np.size(mask,0),np.size(mask,1)
        try:
            img_resized= cv2.resize( img,(int(w),int(h)), interpolation= cv2.INTER_AREA)
        except:
            continue  
        img_resized[mask ==1]= [0,0,0]
        
        #real stuff
        
        low=random.choice([20,40])
        if low==20:
            high=192
        else:
            high=40

        def thresholding():
            _,thresh1 = cv2.threshold(foregroundimg_grayscale,low,high,cv2.THRESH_BINARY)
            pre_final= cv2.cvtColor(thresh1.copy(),cv2.COLOR_GRAY2RGB)
            return pre_final
        def edgedetection():
            canny = cv2.Canny(foregroundimg, 0, 300)
            pre_final = np.stack((canny,)*3, axis=-1)
            return pre_final
        
        functions=[thresholding(),edgedetection()]
        random.choice(functions) #randomly chose a method
        
        #final image
        final= random.choice(functions)+img_resized 
        
        cv2.imwrite(os.path.join(path ,str(image_id)+".jpg"), final)
  

## Create annotations and save

In [None]:
final = [file for file in glob('./input/Background_function_mix/*.jpg')]
bboxes_df = pd.DataFrame()
for i in tqdm(range(len(final))):
    base = (os.path.basename(final[i]))
    image_id=os.path.splitext(base)[0]
    mask = get_image_data(image_id, "Train_mask") 
    where = np.array(np.where(mask))
    x1, y1 = (np.amin(where, axis=1))
    x2, y2 = (np.amax(where, axis=1))
    bboxes = {'image': "./datasets/carvana/"+str(image_id)+".jpg",'x1':y1,'y1':x1,'x2':y2, 'y2':x2,'class':'car'}
    bboxes_df=bboxes_df.append(bboxes, ignore_index=True)
    bboxes_df=bboxes_df[['image','x1','y1','x2','y2','class']]
    bboxes_df=bboxes_df.astype({"x1": int, "x2": int, "y1" : int, "y2" : int})
bboxes_df.to_csv('bbox_d.csv',index=False,header= False)