# Packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from glob import glob
from PIL import Image
from tqdm import tqdm
from shutil import copy
from transformers import pipeline
import torch
import os

# Parameters

In [2]:
# Path where is all the raw images
RAW_PATH = Path(r'D:\ML Datasets\kagglecatsanddogs_5340\PetImages')
OUTPUT_PATH = Path(r'D:\ML Datasets\kagglecatsanddogs_5340\Color')
OUTPUT_PATH_DEPTH_MAP = Path(r'D:\ML Datasets\kagglecatsanddogs_5340\Depth Map')
IMG_FORMAT = 'jpg'
SIZE = 256

# Functions

In [11]:
def img_crop_resize(raw_img_path, img_name,nominal_size = SIZE, output_path = OUTPUT_PATH, format=IMG_FORMAT, verbose=False):
    #load img
    image = Image.open(raw_img_path)

    if image.mode != 'RGB':
        image = image.convert('RGB')

    # shape
    width, height = image.size
    if verbose:
        print(f'width: {width}, height: {height}')
    # resize conditions
    if width < nominal_size and nominal_size < height:
        if verbose:
            print('Resize Case 1')
        scale = nominal_size / width
        new_image = image.resize((int(width * scale) , int(height * scale) ), Image.LANCZOS)
    elif  width > nominal_size and nominal_size > height:
        if verbose:
            print('Resize Case 2')
        scale = nominal_size / height
        new_image = image.resize((int(width * scale) , int(height * scale) ), Image.LANCZOS)
    elif width < nominal_size and nominal_size > height:
        if verbose:
            print('Resize Case 3')
        scale = nominal_size / min([height,width])
        new_image = image.resize((int(width * scale) , int(height * scale) ), Image.LANCZOS)
    else:
        if verbose:
            print('Resize Case 4')
        new_image = image.copy()

    # center coords
    width2, height2 = new_image.size
    # last tranformation
    if (width > nominal_size*1  and width < nominal_size*1.25)  and (height > nominal_size*1 and height < nominal_size*1.25):
        new_image = new_image.resize((nominal_size, nominal_size), Image.LANCZOS)
        if verbose:
            print('Resize Case 5')
    else:
        crop = new_image.crop(
            (width2//2 - nominal_size//2 ,height2//2 - nominal_size//2, width2//2 + nominal_size//2,height2//2 + nominal_size//2)
            )
        new_image = crop.resize((nominal_size, nominal_size), Image.LANCZOS)
        if verbose:
            print('Resize Case 6')

    # final shape
    if verbose:
        print(f'Final shape: {new_image.size}')

    # # name and format
    # if len(str(img_name)) == 1:
    #     img_name = f'0{img_name}'
    # name
    file = f'{img_name}.{format}'

    # Save img
    new_image.save(
        str(
            Path(
                output_path,
                file
            )
        )
    )



# List of raw images

In [12]:
list_of_raw_imgs_dogs = glob(
    str(
        Path(
            RAW_PATH,
            'Dog',
            '*.jpg'
        )
    )
)

list_of_raw_imgs_cats = glob(
    str(
        Path(
            RAW_PATH,
            'Cat',
            '*.jpg'
        )
    )
)

print(f'Total imgs dogs: {len(list_of_raw_imgs_dogs)}')
print(f'Total imgs cats: {len(list_of_raw_imgs_cats)}')

Total imgs dogs: 12500
Total imgs cats: 12500


# Execution

In [13]:
count = 0
for path in tqdm(list_of_raw_imgs_dogs, bar_format='{l_bar}{bar:20}{r_bar}', desc = "Processing"):
    try:
        img_crop_resize(
            raw_img_path=path, 
            output_path= str(Path(OUTPUT_PATH,'Dog')),
            img_name=count+1,
            verbose=False
        )
        count+=1
    except:
        pass


count = 0
for path in tqdm(list_of_raw_imgs_cats, bar_format='{l_bar}{bar:20}{r_bar}', desc = "Processing"):
    try:
        img_crop_resize(
            raw_img_path=path, 
            output_path= str(Path(OUTPUT_PATH,'Cat')),
            img_name=count+1,
            verbose=False
        )
        count+=1
    except:
        pass

Processing: 100%|████████████████████| 12500/12500 [00:51<00:00, 241.52it/s]
Processing: 100%|████████████████████| 12500/12500 [00:50<00:00, 248.40it/s]


# Trainning dataset

In [3]:
torch.zeros(1).cuda()

tensor([0.], device='cuda:0')

In [4]:
torch.cuda.is_available()

True

In [5]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'

In [8]:
DEVICE

'cuda'

In [7]:
pipe = pipeline(task="depth-estimation", model="depth-anything/Depth-Anything-V2-Small-hf", device=0)

# List of cropped images

In [25]:
list_of_raw_imgs_dogs = glob(
    str(
        Path(
            OUTPUT_PATH,
            'Dog',
            '*.jpg'
        )
    )
)

list_of_raw_imgs_cats = glob(
    str(
        Path(
            OUTPUT_PATH,
            'Cat',
            '*.jpg'
        )
    )
)

print(f'Total imgs dogs: {len(list_of_raw_imgs_dogs)}')
print(f'Total imgs cats: {len(list_of_raw_imgs_cats)}')

Total imgs dogs: 12499
Total imgs cats: 12499


In [26]:
for path in tqdm(list_of_raw_imgs_dogs, bar_format='{l_bar}{bar:20}{r_bar}', desc = "Processing"):
    file_name = path[len(str(Path(OUTPUT_PATH,'Dog')))+1 :]
    image = Image.open(path)
    depth = pipe(image)["depth"]
    # Save img
    depth.save(
        str(
            Path(
                OUTPUT_PATH_DEPTH_MAP,
                'Dog',
                file_name
            )
        )
    )
  

Processing: 100%|████████████████████| 12499/12499 [12:01<00:00, 17.32it/s]


In [28]:
for path in tqdm(list_of_raw_imgs_cats, bar_format='{l_bar}{bar:20}{r_bar}', desc = "Processing"):
    file_name = path[len(str(Path(OUTPUT_PATH,'Cat')))+1 :]
    image = Image.open(path)
    depth = pipe(image)["depth"]
    # Save img
    depth.save(
        str(
            Path(
                OUTPUT_PATH_DEPTH_MAP,
                'Cat',
                file_name
            )
        )
    )

Processing: 100%|████████████████████| 12499/12499 [11:53<00:00, 17.51it/s]
