In [1]:
from pathlib import Path
import numpy as np
import pandas as pd
import seaborn as sns

import cv2
import matplotlib.pyplot as plt

In [2]:
csv_train_path = '../data/train/gt.csv'
csv_test_path = '../data/test/gt.csv'

In [3]:
df_train = pd.read_csv(csv_train_path)
df_train['split'] = 'train'
df_test = pd.read_csv(csv_test_path)
df_test['split'] = 'test'

In [4]:
label_map = {
    0:"positive",
    1:"negative",
    2:"empty",
    3:"invalid",
}

split_map = {
    0:"train",
    1:"val",
    -1:"test",
}

In [5]:
df_cls = pd.concat([df_train,df_test])
df_cls['brand'] = df_cls['image'].str.extract(r'([^_]+)')
df_cls['stem'] = df_cls.image.str.extract('(.*)_[jpg]')
df_cls['fold'] = df_cls['fold'].fillna(-1).astype('int')
df_cls.head()

Unnamed: 0,image,target,fold,split,brand,stem
0,safecare_negative_030_jpg.rf.7c617838cb5f06ef9...,1,0,train,safecare,safecare_negative_030
1,safecare_invalid_swap_038_jpg.rf.89441e6b07934...,3,0,train,safecare,safecare_invalid_swap_038
2,hygisun_negative_005_jpg.rf.ff1b59ca7eb97855c3...,1,0,train,hygisun,hygisun_negative_005
3,medicovid_negative_real_005_png.rf.8502b610e6f...,1,0,train,medicovid,medicovid_negative_real_005
4,safecare_invalid_swap_012_jpg.rf.9a46f6916090f...,3,0,train,safecare,safecare_invalid_swap_012


In [6]:
raw_path = f"../../../../covisionapp_image_upload/"
stems = list((p.resolve(),p.resolve().stem) for p in Path(raw_path).glob("**/*") if p.suffix in {".jpg", ".png"})
df_raw = pd.DataFrame(stems)
df_raw.columns = ['raw_image', 'stem']
# df_raw.head()

In [7]:
txt_path = '../../../covision-training/data/raw/'
txts = list((p.resolve(),p.resolve().stem) for p in Path(txt_path).glob("**/labels/*") if p.suffix in {".txt",})
df_txt = pd.DataFrame(txts)
df_txt.columns = ['raw_txt', 'stem']
df_txt['stem'] = df_txt['stem'].str.extract('(.*)_[jpg]')
# df_txt.head()

In [8]:
# list(filter(lambda x: x=='safecare_negative_030', stems))

In [9]:
assert len(df_cls[~df_cls['stem'].isin(df_raw['stem'])]) == 0

In [10]:
assert len(df_cls[~df_cls['stem'].isin(df_txt['stem'])]) == 0

In [11]:
df = df_cls.merge(df_raw, on='stem').merge(df_txt, on='stem')
# df

In [12]:
root = Path('../data/')

In [13]:
df['new_image'] = df.raw_txt.map(lambda x: Path(x).stem + '.jpg')

In [14]:
df[df['split']=='train'][['new_image','target','fold']].to_csv(root / 'train' / 'gt.csv', header=['image','target','fold'], index=False)

In [15]:
df[df['split']=='test'][['new_image','target']].to_csv(root / 'test' / 'gt.csv', header=['image','target'], index=False)

In [16]:
maxwidth, maxheight = 300, 300
if True:
    for row in df.itertuples():
        img = cv2.imread(str(row.raw_image))
        # plt.imshow(img)
        with open(row.raw_txt) as f:
            lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32)
        
        dh, dw = img.shape[:2]
        x, y, w, h = lb[0][1:]
        top = int((y - h / 2.) * dh)
        left = int((x - w / 2.) * dw)
        height = int(dh*h)
        width = int(dw*w)
    
        cropped_image = img[top:top+height, left:left+width]
        f1 = maxwidth / cropped_image.shape[1]
        f2 = maxheight / cropped_image.shape[0]
        f = min(f1, f2)  # resizing factor
        dim = (int(cropped_image.shape[1] * f), int(cropped_image.shape[0] * f))
        cropped_image = cv2.resize(cropped_image, dim)
        # plt.imshow(cropped_image)
    
        ds = 'train' if row.split in ['train','val'] else 'test'
        fname = root / ds / 'images' / (row.raw_txt.stem + '.jpg')
        # print(fname)
        fname.parent.mkdir(parents=True, exist_ok=True)
        cv2.imwrite(str(fname), cropped_image)