In [57]:
import glob
import os
import random
from PIL import Image, ImageFile
import tqdm

import albumentations
import numpy as np
import pandas as pd
import torch

In [9]:
spots_df = pd.read_csv("../data/classification_dataset.csv")
spots_df['class'] = spots_df[spots_df['class'] == 1]

In [10]:
spots_df

Unnamed: 0,path,class
0,..\data\images\spots\Capbreton_Santosha\2022-0...,
1,..\data\images\spots\Biarritz\2022-03-23_08-00...,
2,..\data\images\spots\Biarritz\2022-03-22_14-15...,..\data\images\spots\Biarritz\2022-03-22_14-15...
3,..\data\images\spots\Anglet_GrandePlage\2022-0...,..\data\images\spots\Anglet_GrandePlage\2022-0...
4,..\data\images\spots\Capbreton_Santosha\2022-0...,..\data\images\spots\Capbreton_Santosha\2022-0...
...,...,...
1451,..\data\images\spots\Capbreton_Santosha\2022-0...,
1452,..\data\images\spots\Biarritz\2022-03-19_09-15...,..\data\images\spots\Biarritz\2022-03-19_09-15...
1453,..\data\images\spots\Anglet_GrandePlage\2022-0...,
1454,..\data\images\spots\Lacanau\2022-03-19_18-45.jpg,


In [15]:
aug = albumentations.Compose(
    [
        albumentations.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0,
        ),
    ],
    p=1.0,
)


to_drop = []
paths = spots_df.path.apply(lambda x : "..\\" + x).values
for i in range(len(paths)):
    image = Image.open(paths[i])
    try:
        image = np.array(image)
        augmented = aug(image=image)
        image = augmented["image"]    
    except Exception:
        to_drop.append(i)
        print("to drop : ", i)
    else:
        pass
spots_df = spots_df.drop(to_drop)

to drop :  196
to drop :  320
to drop :  633


In [62]:
from itertools import product
import matplotlib.pyplot as plt

def tile(image_path, output_path, d, show_images=False):
    spot_name, file_name = image_path.split("\\")[-2:]
    file_name, ext = file_name.split(".")
    
    img = Image.open(os.path.join(image_path))
    w, h = img.size
    
    grid = product(range(0, h-h%d, d), range(0, w-w%d, d))
    image_data_list = []
    for i, j in grid:
        box = (j, i, j+d, i+d)  
        image_name = f'{spot_name}_{file_name}_y-{i}_x-{j}.{ext}'
        out = os.path.join(output_path, image_name)
        im = img.crop(box)
        im.save(out)
        if show_images:
            plt.imshow(im)     
            plt.show()
        image_data_list.append({"spot": spot_name,
                                "initial_image_file": file_name + ext,
                                "x":j,
                                "y":i,
                                "image_file": image_name})
    return image_data_list

In [63]:
#plt.rcParams["figure.figsize"] = (20,16)
image_data_list = []
for image_path in tqdm.tqdm(spots_df.path.values):
    image_data_list += tile("..\\" + image_path, "..\\data\\images_512x512\\", 512)
new_image_df = pd.DataFrame(image_data_list)

100%|██████████| 1453/1453 [01:17<00:00, 18.85it/s]


In [64]:
new_image_df

Unnamed: 0,spot,initial_image_file,x,y,image_file
0,Capbreton_Santosha,2022-03-29_15-00jpg,0,0,Capbreton_Santosha_2022-03-29_15-00_y-0_x-0.jpg
1,Capbreton_Santosha,2022-03-29_15-00jpg,512,0,Capbreton_Santosha_2022-03-29_15-00_y-0_x-512.jpg
2,Capbreton_Santosha,2022-03-29_15-00jpg,1024,0,Capbreton_Santosha_2022-03-29_15-00_y-0_x-1024...
3,Capbreton_Santosha,2022-03-29_15-00jpg,0,512,Capbreton_Santosha_2022-03-29_15-00_y-512_x-0.jpg
4,Capbreton_Santosha,2022-03-29_15-00jpg,512,512,Capbreton_Santosha_2022-03-29_15-00_y-512_x-51...
...,...,...,...,...,...
7213,Anglet_GrandePlage,2022-03-23_12-30jpg,512,0,Anglet_GrandePlage_2022-03-23_12-30_y-0_x-512.jpg
7214,Anglet_GrandePlage,2022-03-23_12-30jpg,1024,0,Anglet_GrandePlage_2022-03-23_12-30_y-0_x-1024...
7215,Anglet_GrandePlage,2022-03-23_12-30jpg,0,512,Anglet_GrandePlage_2022-03-23_12-30_y-512_x-0.jpg
7216,Anglet_GrandePlage,2022-03-23_12-30jpg,512,512,Anglet_GrandePlage_2022-03-23_12-30_y-512_x-51...


In [66]:
new_image_df.to_csv("..\\data\\smaller_crops_512x512.csv", index=False)