In [1]:
from os import listdir, path
import sys
import json
from typing import Optional

import cv2
import numpy as np
import pandas as pd

from src.contour import Contour

In [2]:
stella_image_dir = 'data/stella/'
pricetag_image_dir = 'data/pricetag/'
csv_dir = 'data/csv/'
csv_stella = 'stella.csv'
csv_pricetag = 'pricetag.csv'
stella_crop_images_dir = 'data/crop_images_stella/'
pricetag_crop_images_dir = 'data/crop_images_pricetag/'

In [3]:
# DataFrame of metadata
df_stella = pd.read_csv(path.join(sys.path[1], csv_dir, csv_stella))
df_pricetag = pd.read_csv(path.join(sys.path[1], csv_dir, csv_pricetag))

# name images
file_names_stella = listdir(path.join(sys.path[1], stella_image_dir))
file_names_pricetag = listdir(path.join(sys.path[1], pricetag_image_dir))

# tags
stella_tags = ['petrol_name', 'price']
pricetag_tags = ['text', 'rubles', 'kopeks']

In [4]:
df_stella.head()

Unnamed: 0,#filename,file_size,file_attributes,region_count,region_id,region_shape_attributes,region_attributes
0,21.jpg,120983,{},8,0,"{""name"":""rect"",""x"":121,""y"":217,""width"":137,""he...","{""petrol_name"":""G 95""}"
1,21.jpg,120983,{},8,1,"{""name"":""rect"",""x"":281,""y"":218,""width"":345,""he...","{""price"":""39.40""}"
2,21.jpg,120983,{},8,2,"{""name"":""rect"",""x"":111,""y"":367,""width"":133,""he...","{""petrol_name"":""95""}"
3,21.jpg,120983,{},8,3,"{""name"":""rect"",""x"":281,""y"":376,""width"":354,""he...","{""price"":""38.50""}"
4,21.jpg,120983,{},8,4,"{""name"":""rect"",""x"":99,""y"":536,""width"":147,""hei...","{""petrol_name"":""92""}"


In [14]:
df_pricetag.head()

Unnamed: 0,#filename,file_size,file_attributes,region_count,region_id,region_shape_attributes,region_attributes
0,30.jpg,1182815,{},4,0,"{""name"":""rect"",""x"":393,""y"":303,""width"":2821,""h...","{""text"":""ЙОГУРТ НАТУРЕЛЬ 2,5% НА""}"
1,30.jpg,1182815,{},4,1,"{""name"":""rect"",""x"":370,""y"":610,""width"":2927,""h...","{""text"":""ТОП.МОЛОКЕ Б/САХ.СТ.125Г""}"
2,30.jpg,1182815,{},4,2,"{""name"":""rect"",""x"":1782,""y"":913,""width"":1349,""...","{""rubles"":""21""}"
3,30.jpg,1182815,{},4,3,"{""name"":""rect"",""x"":3300,""y"":952,""width"":527,""h...","{""kopeks"":""50""}"
4,29.jpg,1121896,{},5,0,"{""name"":""rect"",""x"":80,""y"":202,""width"":2612,""he...","{""text"":""Десерт Чудо молочный""}"


In [6]:
def get_contours_by_metadata(image_name: str,
                            tag: str,
                            metadata: pd.DataFrame) -> Optional[Contour]:
    image_area_tags = (metadata[metadata['#filename'] == image_name]
                       .region_attributes
                       .apply(lambda attr: list(json.loads(attr).keys())[0]))

    metadata_for_image = metadata[(metadata['#filename'] == image_name) & (image_area_tags == tag)]

    if metadata_for_image.shape[0] == 0:
        return None
    
    series_image_shape = metadata_for_image.region_shape_attributes.apply(json.loads)
    
    contours = []
    
    for image_shape in series_image_shape:
        
        contour = Contour(bounding_rect=(image_shape['x'], image_shape['y'],
                                         image_shape['width'], image_shape['height']))
        contours.append(contour)
        
    return contours


def crop_image(image, contour):
    x_min, y_min, width, height = contour.bounding_rect
    x_max, y_max = x_min + width, y_min + height
    return image[y_min:y_max, x_min:x_max]


def save_crop(tags, file_names, metadata, input_dir, output_dir):
    for tag in tags:
        for file_name in file_names:
            try:
                contours = get_contours_by_metadata(image_name=file_name, tag=tag, metadata=metadata)
                k = 0
                for contour in contours:
                    image = cv2.imread(path.join(sys.path[1], input_dir, file_name))
                    if image is None:
                        print(1)
                    else:
                        crop = crop_image(image=image, contour=contour)
                        cv2.imwrite(path.join(sys.path[1], output_dir, str(k) + tag + file_name), crop)
                        k += 1
            except IndexError:
                print(file_name)

In [7]:
# contours = get_contours_by_metadata(file_name, tag, df_pricetag)

NameError: name 'file_name' is not defined

In [11]:
# save crop images for stella
save_crop(tags=stella_tags, 
          file_names=file_names_stella,
          metadata=df_stella, 
          input_dir=stella_image_dir, 
          output_dir=stella_crop_images_dir)

In [13]:
# save crop images for pricetag
save_crop(tags=pricetag_tags, 
          file_names=file_names_pricetag, 
          metadata=df_pricetag, 
          input_dir=pricetag_image_dir, 
          output_dir=pricetag_crop_images_dir)