In [1]:
import PIL
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import os
from shapely.geometry import Polygon, LinearRing

In [2]:
def rle_to_pixels(rle_code):
#     rle_code = [int(i) for i in rle_code.split()]
    pixels = [(pixel_position % 768, pixel_position // 768)
              for start, length in list(zip(rle_code[0:-1:2], rle_code[1::2]))
              for pixel_position in range(start, start + length)]
    return pixels

In [3]:
def apply_mask(image, mask):
    for x, y in mask:
        image[x, y, [0, 1]] = 255
    return image

In [4]:
def show_img(title):
    path = "D:\\diplom\\train_v2\\"
    img = cv2.imread(path + title)
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    return 0

In [5]:
def str_to_int(array):
    for i in range(len(array)):
        array[i] = np.fromstring(array[i], dtype=int, sep=' ')
    return array

In [6]:
def angle_coor(ship_mask):
    coor = np.zeros((4,2))
    
    coor[0] = ship_mask[np.argmin(ship_mask[:,0])] # точка с y_min
    coor[1] = ship_mask[np.argmin(ship_mask[:,1])] # точка с x_min
    coor[2] = ship_mask[np.argmax(ship_mask[:,0])] # точка с y_max
    coor[3] = ship_mask[np.argmax(ship_mask[:,1])] # точка с x_max
    
    return coor

In [7]:
def in_square(point, val, i, j):
    if ((val[i] + 10) < point[0] < (val[i+1] - 10)) and \
    ((val[j] + 10) < point[1] < (val[j+1] - 10)):
        return True
    else:
        return False

In [8]:
def check_min_max(angles):
    if (angles[0][0] == angles[3][0]) & (angles[0][1] > angles[3][1]):
        angles[[0,3]] = angles[[3,0]] 
    
    if (angles[1][0] == angles[2][0]) & (angles[1][1] > angles[2][1]):
        angles[[1,2]] = angles[[2,1]]
        
    return angles

In [9]:
def crop_ext_samples_positives(positive_path, path, title, val, df):
    img_df = df.loc[df['ImageId'] == title]
    img_df = img_df['EncodedPixels']
    img_df = img_df.values
    img_df = str_to_int(img_df)
    
    img = cv2.imread(path + title)

    for i in range(len(val) - 1):
        for j in range(len(val) - 1):
            for k in range(len(img_df)):
                angles = angle_coor(np.asarray(rle_to_pixels(img_df[k]))) #получение координат углов у судна 
                if k == 0:
                    print(angles)
                if (in_square(angles[0], val, i, j)) or (in_square(angles[1], val, i, j)) \
                or (in_square(angles[2], val, i, j)) or (in_square(angles[3], val, i, j)):
                    crop_img = img[val[i]:val[i+1], val[j]:val[j+1]].copy()
                    cv2.imwrite((os.path.join(positive_path, f'{i}_{j}_{title}')), crop_img)
                     

In [10]:
def crop_in_samples_positives(positive_path, path, title, val, df):
    img_df = df.loc[df['ImageId'] == title]
    img_df = img_df['EncodedPixels']
    img_df = img_df.values
    img_df = str_to_int(img_df)
    
    img = cv2.imread(path + title)
    
    for i in range(len(val) - 1):
        for j in range(len(val) - 1):
            for k in range(len(img_df)):
                angles = angle_coor(np.asarray(rle_to_pixels(img_df[k])))
                if restrict(angles) == True:
                    continue
                angles = check_min_max(angles)
                if check_overlaps(angles, val, i, j) == True:
                    crop_img = img[val[i]:val[i+1], val[j]:val[j+1]].copy()
                    cv2.imwrite((os.path.join(positive_path, f'{i}_{j}_{title}')), crop_img)
                    

In [220]:
def crop_samples_negatives(negative_path, path, neg_title, val):
    img = cv2.imread(path + neg_title)
    
    for i in range(len(val) - 1):
        for j in range(len(val) - 1):
            crop_img = img[val[i]:val[i+1], val[j]:val[j+1]].copy()
            cv2.imwrite((os.path.join(negative_path, f'{i}_{j}_{neg_title}')), crop_img)         
            

In [234]:
def crop_rand_samples_negatives(negative_path, path, neg_title, val, step_count_i, step_count_j):
    # берутся случаайные части изображения 
    # количество взятых сегментов будет равно step_count**2 
    
    img = cv2.imread(path + neg_title)
    
    i_steps = np.random.choice(len(val)-1, step_count_i, replace = False)
    j_steps = np.random.choice(len(val)-1, step_count_j, replace = False)
    
    for i in i_steps:
        for j in j_steps:
            crop_img = img[val[i]:val[i+1], val[j]:val[j+1]].copy()
            cv2.imwrite((os.path.join(negative_path, f'{i}_{j}_{neg_title}')), crop_img)
            

In [12]:
def check_overlaps(angles, vl, i, j, show = False):
    
    flip_angles = np.zeros((4,2))
    
    for k in range(len(angles)):
        flip_angles[k] = np.flip(angles[k])
    
    p1 = Polygon([flip_angles[0], flip_angles[1], flip_angles[2], flip_angles[3]])
    p2 = Polygon([(val[j],val[i]), (val[j+1],val[i]), (val[j+1],val[i+1]), (val[j],val[i+1])])
    
    if show == True:
        x, y = p1.exterior.xy
        x1, y1 = p2.exterior.xy
        fig = plt.figure(1, figsize=(5,5))
        ax = fig.add_subplot(111)
        plt.gca().invert_yaxis()
        ax.plot(x, y, 'r-', x1, y1, 'b-')
        ax.set_title('Polygon')
    
    return p1.intersects(p2)

In [13]:
def restrict(angles):
    # если какая-либо диагональ баундинг бокса больше данного значения, то это
    # явно ошибка в разметке
    if ((angles[2][0] - angles[0][0]) > 480)|((angles[1][1] - angles[3][1]) > 480):
        return True
    else :
        return False

In [14]:
file = "un"
df_set = pd.read_csv(f"D:\\diplom\\{file}_ship_segmentations.csv")
df = pd.read_csv("D:\\diplom\\train_ship_segmentations_v2.csv")
df['HasShip'] = df['EncodedPixels'].notnull()

# sdf = df_set.loc[lambda df_set : df_set['HasShip'] == True].reset_index(drop=True)
df = df.loc[lambda df: df['HasShip'] == False].reset_index(drop=True)
path = "D:\\diplom\\train_v2\\"
val = np.array([0, 256, 512, 768])# вектор, который содержит координаты внешних отсечений

# n_sdf = sdf.values
# n_sdf = np.hstack((n_sdf, np.zeros((n_sdf.shape[0],1))))


2.770932252460915

##### Выбор и нарезка положительных семплов для окна 128х128

In [25]:
title = df_set['ImageId'].drop_duplicates(keep = 'first').values

8635

In [27]:
val_in = np.linspace(0, 768, 7).astype(int)
in_pos_path = "D:\\diplom\\positive_in\\"

for i in title : crop_in_samples_positives(in_pos_path, path, i, val_in, df_set)

In [15]:
len(os.listdir(path="D:\\diplom\\positive_in\\"))

23927

In [33]:
# for i in range(len(n_sdf)):
#     n_sdf[i,3] = np.fromstring(n_sdf[i][1], dtype=int, sep=' ')[1::2].sum()

In [12]:
# n_sdf = n_sdf[n_sdf[:,3].argsort()][::-1]
# n_sdf = pd.DataFrame({'ImageId':n_sdf[:,0], 'EncodedPixels':n_sdf[:,1], 'HasShip':n_sdf[:,2], 'Area':n_sdf[:,3]})

#### Выбор и нарезка положительных семплов для окна 256х256

In [59]:
positive_path = 'D:\diplom\positive_ext'

for i in title : crop_ext_samples_positives(positive_path, path, i, val, df_set)

In [27]:
# print('number of images:', len(os.listdir(path="D\diplom\positive_ext\")))
len(os.listdir(path="D:\\diplom\\positive_ext\\"))

15710

#### Выбор и нарезка отрицательных семплов для окна 128х128


In [248]:
neg_titles = df.sample(n=4000, random_state = 512)["ImageId"].values

In [249]:
negative_path = "D:\\diplom\\negative_in"
# neg_title = np.array(['fffe646cf.jpg'])
val_in = np.linspace(0, 768, 7).astype(int)
step_count_i = 3
step_count_j = 3

In [250]:
for title in neg_titles : crop_rand_samples_negatives(negative_path, path, title, val_in, step_count_i, step_count_j)

#### Выбор и нарезка отрицательных семплов для окна 256х256

In [202]:
neg_titles = df.sample(frac = 174/15000, random_state = 256)["ImageId"].values


In [75]:
negative_path = "D:\\diplom\\negative_ext"
for i in neg_titles : crop_samples_negatives(negative_path, path, i, val)

In [76]:
len(os.listdir(path="D:\\diplom\\negative_ext\\"))

15660