In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import cv2
import PIL
import copy
import random
import matplotlib.pyplot as plt
from math import sqrt
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.preprocessing import label_binarize
from sklearn.metrics import RocCurveDisplay, precision_score, recall_score
from sklearn.metrics import f1_score, accuracy_score, roc_curve, auc, roc_auc_score
from tensorflow.keras.initializers import RandomNormal, Constant
from sklearn.model_selection import train_test_split
from tensorflow.keras import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras import initializers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, Callback, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Input, Flatten, Activation, GlobalMaxPooling2D

In [None]:
df_train = pd.read_json('../input/iceberg-sanella-dataset/train.json') # this is a dataframe

In [None]:
def get_scaled_imgs(df):
    imgs = []
    
    for i, row in df.iterrows():
        #make 75x75 image
        
        band_1 = np.array(row['band_1']).reshape(75, 75)
        band_2 = np.array(row['band_2']).reshape(75, 75)
        band_sum = band_1 + band_2 # plus since log(x*y) = log(x) + log(y)
        
#         # Rescale
        a = (band_1 - band_1.mean()) / (band_1.max() - band_1.min())
        b = (band_2 - band_2.mean()) / (band_2.max() - band_2.min())
        c = (band_sum - band_sum.mean()) / (band_sum.max() - band_sum.min())
#         a = (band_1 - band_1.mean()) / np.std(band_1)
#         b = (band_2 - band_2.mean()) / np.std(band_2)
#         c = (band_sum - band_sum.mean()) / np.std(band_sum)
        
        imgs.append(np.dstack((a, b, c)))

    return np.array(imgs)


In [None]:
Xtrain = get_scaled_imgs(df_train) 
Ytrain = np.array(df_train['is_iceberg'])

In [None]:
print(np.shape(Xtrain))
print(np.shape(Ytrain))

In [None]:
def rotate_image(image, angle):
  image_center = tuple(np.array(image.shape[1::-1]) / 2)
  rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
  result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv2.INTER_LINEAR)
  return result

In [None]:
def get_more_images(imgs):
    
    more_images = []
    vert_flip_imgs = []
    hori_flip_imgs = []
      
    for i in range(0,imgs.shape[0]):
        a=imgs[i,:,:,0]
        b=imgs[i,:,:,1]
        c=imgs[i,:,:,2]
        
        av=cv2.flip(a,1)
        ah=cv2.flip(a,0)
        bv=cv2.flip(b,1)
        bh=cv2.flip(b,0)
        cv=cv2.flip(c,1)
        ch=cv2.flip(c,0)
        
        vert_flip_imgs.append(np.dstack((av, bv, cv)))
        hori_flip_imgs.append(np.dstack((ah, bh, ch)))
      
    v = np.array(vert_flip_imgs)
    h = np.array(hori_flip_imgs)
       
    more_images = np.concatenate((imgs,v,h))
    
    return more_images


In [None]:
Xtr_more = get_more_images(Xtrain) 
Ytr_more = np.concatenate((Ytrain, Ytrain, Ytrain))

In [None]:
print(np.shape(Xtr_more))
print(np.shape(Ytr_more))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(Xtr_more, Ytr_more, test_size=0.25, random_state=42)

In [None]:
print('X_train', np.shape(X_train))
print('X_test', np.shape(X_test))
print('y_train', np.shape(y_train))
print('y_test', np.shape(y_test))

In [None]:
model = Sequential()

model.add(Conv2D(64, 3, activation='relu', input_shape=(75, 75, 3)))
model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
model.add(Dropout(0.2))


model.add(Conv2D(128, 3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.2))

model.add(Conv2D(256, 3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.2))


model.add(Conv2D(256, 3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.2))

model.add(Flatten())

model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.2))

model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.2))

# model.add(BatchNormalization(
#         momentum=0.95, 
#         epsilon=0.005,
#         beta_initializer=RandomNormal(mean=0.0, stddev=0.05), 
#         gamma_initializer=Constant(value=0.9)
#     ))

model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.2))

model.add(Dense(1, activation="sigmoid"))
opt = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["binary_accuracy"])
model.summary()


checkpoint_path = '/kaggle/working/training_1/'
earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
mcp_save = ModelCheckpoint(filepath=checkpoint_path, save_weights_only=True, save_best_only=True, monitor='val_loss', mode='min')
reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=7, verbose=1, epsilon=1e-4, mode='min')

# model.load_weights(checkpoint_path)
model.fit(X_train, y_train, validation_split = 0.2, batch_size=32, epochs=50, verbose=1, callbacks=[earlyStopping, reduce_lr_loss, mcp_save])

In [None]:
prediction = model.predict(X_test)


In [None]:
model.save_weights('/kaggle/working/training_1/my_model_weights.h5')

In [None]:
TP = FP = TN = FN = 0

for i in range(len(prediction)):
    if prediction[i][0] >= 0.5 and y_test[i] == 1:
        TP +=1
    elif prediction[i][0] >= 0.5 and y_test[i] == 0:
        FP +=1
    elif prediction[i][0] < 0.5 and y_test[i] == 1:
        FN +=1
    elif prediction[i][0] < 0.5 and y_test[i] == 0:
        TN +=1
        
pr = TP / (TP+FP)
rec = TP / (TP+FN)
acc = (TP+TN) / (TP+TN+FP+FN)
print('accuracy', acc)
print('precision', pr)
print('recall', rec)
print('F-mera', 2*pr*rec/(pr+rec))


# реальный датасет 1


In [None]:
def linear_to_decibel(band):
    maxi = np.nanmax(band)
    for ind1 in range(75):
        for ind2 in range(75):
            if band[ind1, ind2] == 0:
                band[ind1, ind2] = - maxi
            else:
                band[ind1, ind2] = 10*np.log10(band[ind1, ind2])
                
    return band

In [None]:
# папка оканчивается на DB97f, labeled sar dataset, db97labels
PIL.Image.MAX_IMAGE_PIXELS = None
im_hh = PIL.Image.open('../input/labeledsardataset/s1a-ew-grd-hh-20220403t051038-20220403t051143-042604-051524-001.tiff')
im_hv = PIL.Image.open('../input/labeledsardataset/s1a-ew-grd-hv-20220403t051038-20220403t051143-042604-051524-002.tiff')
img_hh = np.array(im_hh)
img_hv = np.array(im_hv)

In [None]:
# # второй вариант
# img_hh = cv2.imread('../input/labeledsardataset/s1a-ew-grd-hh-20220403t051038-20220403t051143-042604-051524-001.tiff', cv2.IMREAD_GRAYSCALE)
# img_hv = cv2.imread('../input/labeledsardataset/s1a-ew-grd-hv-20220403t051038-20220403t051143-042604-051524-002.tiff', cv2.IMREAD_GRAYSCALE)

In [None]:
print(img_hv.max())
print(img_hv.min())
print(img_hv.mean())
print(img_hh.shape)

In [None]:
centers = []
with open('../input/db97labels/Background-2.txt') as f:
    line = f.readline().strip().split(', ')
    while line:
        x = int(line[0])
        y = int(line[1])
#       centers.append([x,y])
        centers.append([y,x])
        tmp = f.readline()
        if tmp == '':
            break
        line = tmp.strip().split(', ')

In [None]:
imgs = []
for (i, center) in enumerate(centers):
    right_x = center[0] + 37
    left_x = center[0] - 38
    up_y = center[1] + 37
    down_y = center[1] - 38

    tmp_hh = img_hh[left_x:right_x, down_y:up_y]
    tmp_hv = img_hv[left_x:right_x, down_y:up_y]
    
    
    cleared_hh = np.nan_to_num(tmp_hh, nan=np.nanmean(tmp_hh), posinf=np.nanmax(tmp_hh), neginf=0)
    cleared_hv = np.nan_to_num(tmp_hv, nan=np.nanmean(tmp_hv), posinf=np.nanmax(tmp_hv), neginf=0)

    cut_hh = linear_to_decibel(cleared_hh)
    cut_hv = linear_to_decibel(cleared_hv)
    
    band_sum = cut_hh + cut_hv # plus since log(x*y) = log(x) + log(y)

    # Rescale
    denom_hh = denom_hv = denom_mean = 1
#     неправиьлная станд
    if np.nanmax(cut_hh) - np.nanmin(cut_hh) != 0:
        denom_hh = np.nanmax(cut_hh) - np.nanmin(cut_hh)
        
    if np.nanmax(cut_hv) - np.nanmin(cut_hv) != 0:
        denom_hv = np.nanmax(cut_hv) - np.nanmin(cut_hv)
        
    if np.nanmax(band_sum) - np.nanmin(band_sum) != 0:
        denom_mean = np.nanmax(band_sum) - np.nanmin(band_sum)

# правильная станд
#     if np.std(cut_hh) != 0:
#         denom_hh = np.std(cut_hh)
        
#     if np.std(cut_hv) != 0:
#         denom_hv = np.std(cut_hv)
        
#     if np.std(band_sum) != 0:
#         denom_mean = np.std(band_sum)

# правильная норм
    
#     a = (cut_hh - np.nanmin(cut_hh)) / denom_hh
#     b = (cut_hv - np.nanmin(cut_hv)) / denom_hv
#     c = (band_sum - np.nanmin(band_sum)) / denom_mean
    
    a = (cut_hh - np.nanmean(cut_hh)) / denom_hh
    b = (cut_hv - np.nanmean(cut_hv)) / denom_hv
    c = (band_sum - np.nanmean(band_sum)) / denom_mean
    if i == 1:
        fig = plt.figure(0, figsize=(10,10))
        ax = fig.add_subplot(1,2,1)
        ax.set_title('iceberg band hh')
        ax.imshow(tmp_hh, cmap='jet')
        ax = fig.add_subplot(1,2,2)
        ax.set_title('iceberg band hv')
        ax.imshow(tmp_hv,cmap='jet')
        plt.show()
    
    imgs.append(np.dstack((a, b, c)))

dataset1 = np.array(imgs)
print(dataset1.max())
print(dataset1.min())
print(dataset1.mean())
print(dataset1.std())
print(dataset1.shape)

# реальный датасет 2

In [None]:
# папка оканчивается на 174F
im_hh2 = PIL.Image.open('../input/sentinel-photo-1/s1a-ew-grd-hh-20211128t191435-20211128t191522-040775-04d6ff-001.tiff')
im_hv2 = PIL.Image.open('../input/sentinel-photo-1/s1a-ew-grd-hv-20211128t191435-20211128t191522-040775-04d6ff-002.tiff')
img_hh2 = np.array(im_hh2)
img_hv2 = np.array(im_hv2)

print('====stats=====')
print(img_hv2.max())
print(img_hv2.min())
print(img_hv2.mean())
print(img_hh2.shape)
print('==============')
centers2 = []
with open('../input/174flabeled/hh.txt') as f2:
    line2 = f2.readline().strip().split(', ')
    while line2:
        x = int(line2[0])
        y = int(line2[1])
#         centers2.append([x,y])
        centers2.append([y,x])
        tmp = f2.readline()
        if (tmp == '') or (tmp == ' '):
            break
        line2 = tmp.strip().split(', ')

imgs2 = []
for (i, center) in enumerate(centers2):
    if i == 123:
        continue
    right_x = center[0] + 37
    left_x = center[0] - 38
    up_y = center[1] + 37
    down_y = center[1] - 38

    tmp_hh = img_hh2[left_x:right_x, down_y:up_y]
    tmp_hv = img_hv2[left_x:right_x, down_y:up_y]
    
    cleared_hh = np.nan_to_num(tmp_hh, nan=np.nanmean(tmp_hh), posinf=np.nanmax(tmp_hh), neginf=0)
    cleared_hv = np.nan_to_num(tmp_hv, nan=np.nanmean(tmp_hv), posinf=np.nanmax(tmp_hv), neginf=0)

    cut_hh = linear_to_decibel(cleared_hh)
    cut_hv = linear_to_decibel(cleared_hv)
    
    band_sum = cut_hh + cut_hv # plus since log(x*y) = log(x) + log(y)
    if i == 4:
        fig = plt.figure(0, figsize=(10,10))
        ax = fig.add_subplot(1,2,1)
        ax.set_title('iceberg band hh')
        ax.imshow(tmp_hh, cmap='jet')
        ax = fig.add_subplot(1,2,2)
        ax.set_title('iceberg band hv')
        ax.imshow(tmp_hv,cmap='jet')
        plt.show()
        
    # Rescale
    denom_hh = denom_hv = denom_mean = 1
    
    if np.nanmax(cut_hh) - np.nanmin(cut_hh) != 0:
        denom_hh = np.nanmax(cut_hh) - np.nanmin(cut_hh)
        
    if np.nanmax(cut_hv) - np.nanmin(cut_hv) != 0:
        denom_hv = np.nanmax(cut_hv) - np.nanmin(cut_hv)
        
    if np.nanmax(band_sum) - np.nanmin(band_sum) != 0:
        denom_mean = np.nanmax(band_sum) - np.nanmin(band_sum)
#     if np.std(cut_hh) != 0:
#         denom_hh = np.std(cut_hh)
        
#     if np.std(cut_hv) != 0:
#         denom_hv = np.std(cut_hv)
        
#     if np.std(band_sum) != 0:
#         denom_mean = np.std(band_sum)
    
        
#     a = (cut_hh - np.nanmin(cut_hh)) / denom_hh
#     b = (cut_hv - np.nanmin(cut_hv)) / denom_hv
#     c = (band_sum - np.nanmin(band_sum)) / denom_mean
    
    a = (cut_hh - np.nanmean(cut_hh)) / denom_hh
    b = (cut_hv - np.nanmean(cut_hv)) / denom_hv
    c = (band_sum - np.nanmean(band_sum)) / denom_mean
    
    imgs2.append(np.dstack((a, b, c)))

dataset2 = np.array(imgs2)
print(dataset2.max())
print(dataset2.min())
print(dataset2.mean())
print(dataset2.std())
print(dataset2.shape)

### реальный датасет 3

In [None]:
# папка оканчивается на 174F
im_hh3 = PIL.Image.open('../input/0522b6-dataset/s1a-ew-grd-hh-20220501t043837-20220501t043936-043012-0522b6-001.tiff')
im_hv3 = PIL.Image.open('../input/0522b6-dataset/s1a-ew-grd-hv-20220501t043837-20220501t043936-043012-0522b6-002.tiff')
img_hh3 = np.array(im_hh3)
img_hv3 = np.array(im_hv3)

print('====stats=====')
print(img_hv3.max())
print(img_hv3.min())
print(img_hv3.mean())
print(img_hh3.shape)
print('==============')
centers3 = []
with open('../input/0522b6-dataset/labels522b6.txt') as f3:
    line3 = f3.readline().strip().split(', ')
    while line3:
        x = int(line3[0])
        y = int(line3[1])
        centers3.append([y,x])
        tmp = f3.readline()
        if (tmp == '') or (tmp == ' '):
            break
        line3 = tmp.strip().split(', ')

imgs3 = []
for (i, center) in enumerate(centers3):
    right_x = center[0] + 37
    left_x = center[0] - 38
    up_y = center[1] + 37
    down_y = center[1] - 38

    tmp_hh = img_hh3[left_x:right_x, down_y:up_y]
    tmp_hv = img_hv3[left_x:right_x, down_y:up_y]
    
    cleared_hh = np.nan_to_num(tmp_hh, nan=np.nanmean(tmp_hh), posinf=np.nanmax(tmp_hh), neginf=0)
    cleared_hv = np.nan_to_num(tmp_hv, nan=np.nanmean(tmp_hv), posinf=np.nanmax(tmp_hv), neginf=0)

    cut_hh = linear_to_decibel(cleared_hh)
    cut_hv = linear_to_decibel(cleared_hv)
    
    band_sum = cut_hh + cut_hv # plus since log(x*y) = log(x) + log(y)
    
    if i == 15:
        fig = plt.figure(0, figsize=(10,10))
        ax = fig.add_subplot(1,2,1)
        ax.set_title('iceberg band hh')
        ax.imshow(tmp_hh, cmap='jet')
        ax = fig.add_subplot(1,2,2)
        ax.set_title('iceberg band hv')
        ax.imshow(tmp_hv,cmap='jet')
        plt.show()
        
    # Rescale
    denom_hh = denom_hv = denom_mean = 1
    
    if np.nanmax(cut_hh) - np.nanmin(cut_hh) != 0:
        denom_hh = np.nanmax(cut_hh) - np.nanmin(cut_hh)
        
    if np.nanmax(cut_hv) - np.nanmin(cut_hv) != 0:
        denom_hv = np.nanmax(cut_hv) - np.nanmin(cut_hv)
        
    if np.nanmax(band_sum) - np.nanmin(band_sum) != 0:
        denom_mean = np.nanmax(band_sum) - np.nanmin(band_sum)
#     if np.std(cut_hh) != 0:
#         denom_hh = np.std(cut_hh)
        
#     if np.std(cut_hv) != 0:
#         denom_hv = np.std(cut_hv)
        
#     if np.std(band_sum) != 0:
#         denom_mean = np.std(band_sum)
    
        
#     a = (cut_hh - np.nanmin(cut_hh)) / denom_hh
#     b = (cut_hv - np.nanmin(cut_hv)) / denom_hv
#     c = (band_sum - np.nanmin(band_sum)) / denom_mean
    a = (cut_hh - np.nanmean(cut_hh)) / denom_hh
    b = (cut_hv - np.nanmean(cut_hv)) / denom_hv
    c = (band_sum - np.nanmean(band_sum)) / denom_mean

    imgs3.append(np.dstack((a, b, c)))

dataset3 = np.array(imgs3)
print(dataset3.max())
print(dataset3.min())
print(dataset3.mean())
print(dataset3.shape)
print(dataset3.std())

# обработка пустого океана

In [None]:
centers_ocean = []
with open('../input/empty-ocean/Background-3-pustoy.txt') as file:
    line_ocean = file.readline().strip().split(', ')
    while line_ocean:
        x = int(line_ocean[2])
        y = int(line_ocean[3])
        centers_ocean.append([y,x])
        tmp = file.readline()
        if (tmp == '') or (tmp == ' '):
            break
        line_ocean = tmp.strip().split(', ')

imgs_ocean = []
for (i, center) in enumerate(centers_ocean):
    if i < 9 or i> 11:
        local_centres = [center[0]-150, center[0] -75, center[0], center[0] +75, center[0]+150]
        for new_center in local_centres:
            right_x = new_center + 37
            left_x = new_center - 38
            up_y = center[1] + 37
            down_y = center[1] - 38

            tmp_hh = img_hh3[left_x:right_x, down_y:up_y]
            tmp_hv = img_hv3[left_x:right_x, down_y:up_y]

            cleared_hh = np.nan_to_num(tmp_hh, nan=np.nanmean(tmp_hh), posinf=np.nanmax(tmp_hh), neginf=0)
            cleared_hv = np.nan_to_num(tmp_hv, nan=np.nanmean(tmp_hv), posinf=np.nanmax(tmp_hv), neginf=0)

            cut_hh = linear_to_decibel(cleared_hh)
            cut_hv = linear_to_decibel(cleared_hv)

            band_sum = cut_hh + cut_hv # plus since log(x*y) = log(x) + log(y)

#             fig = plt.figure(0, figsize=(10,10))
#             ax = fig.add_subplot(1,2,1)
#             ax.set_title('iceberg band hh')
#             ax.imshow(tmp_hh, cmap='jet')
#             ax = fig.add_subplot(1,2,2)
#             ax.set_title('iceberg band hv')
#             ax.imshow(tmp_hv,cmap='jet')
#             plt.show()

            # Rescale
            denom_hh = denom_hv = denom_mean = 1

            if np.nanmax(cut_hh) - np.nanmin(cut_hh) != 0:
                denom_hh = np.nanmax(cut_hh) - np.nanmin(cut_hh)

            if np.nanmax(cut_hv) - np.nanmin(cut_hv) != 0:
                denom_hv = np.nanmax(cut_hv) - np.nanmin(cut_hv)

            if np.nanmax(band_sum) - np.nanmin(band_sum) != 0:
                denom_mean = np.nanmax(band_sum) - np.nanmin(band_sum)
#             if np.std(cut_hh) != 0:
#                 denom_hh = np.std(cut_hh)

#             if np.std(cut_hv) != 0:
#                 denom_hv = np.std(cut_hv)

#             if np.std(band_sum) != 0:
#                 denom_mean = np.std(band_sum)

    
#             a = (cut_hh - np.nanmin(cut_hh)) / denom_hh
#             b = (cut_hv - np.nanmin(cut_hv)) / denom_hv
#             c = (band_sum - np.nanmin(band_sum)) / denom_mean
            a = (cut_hh - np.nanmean(cut_hh)) / denom_hh
            b = (cut_hv - np.nanmean(cut_hv)) / denom_hv
            c = (band_sum - np.nanmean(band_sum)) / denom_mean

            imgs_ocean.append(np.dstack((a, b, c)))
    else:
        local_centres = [ center[0] - 300, center[0] - 225, center[0] - 150, center[0] - 75, center[0],
                          center[0] + 300, center[0] + 225, center[0] + 150, center[0] + 75,]
        for new_center in local_centres:
            right_x = new_center + 37
            left_x = new_center - 38
            up_y = center[1] + 37
            down_y = center[1] - 38

            tmp_hh = img_hh3[left_x:right_x, down_y:up_y]
            tmp_hv = img_hv3[left_x:right_x, down_y:up_y]

            cleared_hh = np.nan_to_num(tmp_hh, nan=np.nanmean(tmp_hh), posinf=np.nanmax(tmp_hh), neginf=0)
            cleared_hv = np.nan_to_num(tmp_hv, nan=np.nanmean(tmp_hv), posinf=np.nanmax(tmp_hv), neginf=0)

            cut_hh = linear_to_decibel(cleared_hh)
            cut_hv = linear_to_decibel(cleared_hv)

            band_sum = cut_hh + cut_hv # plus since log(x*y) = log(x) + log(y)

#             fig = plt.figure(0, figsize=(10,10))
#             ax = fig.add_subplot(1,2,1)
#             ax.set_title('iceberg band hh')
#             ax.imshow(tmp_hh, cmap='jet')
#             ax = fig.add_subplot(1,2,2)
#             ax.set_title('iceberg band hv')
#             ax.imshow(tmp_hv,cmap='jet')
#             plt.show()

            # Rescale
            denom_hh = denom_hv = denom_mean = 1

            if np.nanmax(cut_hh) - np.nanmin(cut_hh) != 0:
                denom_hh = np.nanmax(cut_hh) - np.nanmin(cut_hh)

            if np.nanmax(cut_hv) - np.nanmin(cut_hv) != 0:
                denom_hv = np.nanmax(cut_hv) - np.nanmin(cut_hv)

            if np.nanmax(band_sum) - np.nanmin(band_sum) != 0:
                denom_mean = np.nanmax(band_sum) - np.nanmin(band_sum)
#             if np.std(cut_hh) != 0:
#                 denom_hh = np.std(cut_hh)

#             if np.std(cut_hv) != 0:
#                 denom_hv = np.std(cut_hv)

#             if np.std(band_sum) != 0:
#                 denom_mean = np.std(band_sum)
    
            a = (cut_hh - np.nanmin(cut_hh)) / denom_hh
            b = (cut_hv - np.nanmin(cut_hv)) / denom_hv
            c = (band_sum - np.nanmin(band_sum)) / denom_mean
#             a = (cut_hh - np.nanmean(cut_hh)) / denom_hh
#             b = (cut_hv - np.nanmean(cut_hv)) / denom_hv
#             c = (band_sum - np.nanmean(band_sum)) / denom_mean

            imgs_ocean.append(np.dstack((a, b, c)))
        

dataset_ocean = np.array(imgs_ocean)
print(dataset_ocean.max())
print(dataset_ocean.min())
print(dataset_ocean.mean())
print(dataset_ocean.shape)
print(dataset_ocean.std())

# реальный датасет 4


In [None]:
# папка оканчивается на ed2a
im_hh4 = PIL.Image.open('../input/ed2adataset/s1a-ew-grd-hh-20220419t043836-20220419t043936-042837-051ced-001.tiff')
im_hv4 = PIL.Image.open('../input/ed2adataset/s1a-ew-grd-hv-20220419t043836-20220419t043936-042837-051ced-002.tiff')
img_hh4 = np.array(im_hh4)
img_hv4 = np.array(im_hv4)

print('====stats=====')
print(img_hv4.max())
print(img_hv4.min())
print(img_hv4.mean())
print(img_hh4.shape)
print('==============')
centers4 = []
with open('../input/ed2adataset/Background-4.txt') as f4:
    line4 = f4.readline().strip().split(', ')
    while line4:
        x = int(line4[0])
        y = int(line4[1])
        centers4.append([y,x])
        tmp = f4.readline()
        if (tmp == '') or (tmp == ' '):
            break
        line4 = tmp.strip().split(', ')

imgs4 = []
for (i, center) in enumerate(centers4):
    right_x = center[0] + 37
    left_x = center[0] - 38
    up_y = center[1] + 37
    down_y = center[1] - 38

    tmp_hh = img_hh4[left_x:right_x, down_y:up_y]
    tmp_hv = img_hv4[left_x:right_x, down_y:up_y]
    
    cleared_hh = np.nan_to_num(tmp_hh, nan=np.nanmean(tmp_hh), posinf=np.nanmax(tmp_hh), neginf=0)
    cleared_hv = np.nan_to_num(tmp_hv, nan=np.nanmean(tmp_hv), posinf=np.nanmax(tmp_hv), neginf=0)

    cut_hh = linear_to_decibel(cleared_hh)
    cut_hv = linear_to_decibel(cleared_hv)
    
    band_sum = cut_hh + cut_hv # plus since log(x*y) = log(x) + log(y)
    
    if i == 8:
        fig = plt.figure(0, figsize=(10,10))
        ax = fig.add_subplot(1,2,1)
        ax.set_title('iceberg band hh')
        ax.imshow(tmp_hh, cmap='jet')
        ax = fig.add_subplot(1,2,2)
        ax.set_title('iceberg band hv')
        ax.imshow(tmp_hv,cmap='jet')
        plt.show()
        
    # Rescale
    denom_hh = denom_hv = denom_mean = 1
    
    if np.nanmax(cut_hh) - np.nanmin(cut_hh) != 0:
        denom_hh = np.nanmax(cut_hh) - np.nanmin(cut_hh)
        
    if np.nanmax(cut_hv) - np.nanmin(cut_hv) != 0:
        denom_hv = np.nanmax(cut_hv) - np.nanmin(cut_hv)
        
    if np.nanmax(band_sum) - np.nanmin(band_sum) != 0:
        denom_mean = np.nanmax(band_sum) - np.nanmin(band_sum)
#     if np.std(cut_hh) != 0:
#         denom_hh = np.std(cut_hh)
        
#     if np.std(cut_hv) != 0:
#         denom_hv = np.std(cut_hv)
        
#     if np.std(band_sum) != 0:
#         denom_mean = np.std(band_sum)
        
#     a = (cut_hh - np.nanmin(cut_hh)) / denom_hh
#     b = (cut_hv - np.nanmin(cut_hv)) / denom_hv
#     c = (band_sum - np.nanmin(band_sum)) / denom_mean
    a = (cut_hh - np.nanmean(cut_hh)) / denom_hh
    b = (cut_hv - np.nanmean(cut_hv)) / denom_hv
    c = (band_sum - np.nanmean(band_sum)) / denom_mean

    imgs4.append(np.dstack((a, b, c)))

dataset4 = np.array(imgs4)
print(dataset4.max())
print(dataset4.min())
print(dataset4.mean())
print(dataset4.shape)
print(dataset4.std())

In [None]:
print(dataset1.shape)
print(dataset2.shape)
print(dataset3.shape)
print(dataset4.shape)
print(dataset_ocean.shape)


dataset_X_ice = np.vstack((dataset1, dataset2, dataset3, dataset4))
 
dataset_X_ocean_more = get_more_images(dataset_ocean) 
dataset_X_ice_more = get_more_images(dataset_X_ice) 

In [None]:
testik_X = np.vstack((dataset_X_ocean_more[0:100], dataset_X_ice_more[0:100]))
testik_y = np.concatenate((np.array([0 for i in range(100)]), np.array([1 for j in range(100)])))

In [None]:
print(dataset_X_ocean_more.shape)
print(dataset_X_ice.shape)

In [None]:
dataset_X_full = np.vstack((dataset_X_ice_more[100:], dataset_X_ocean_more[100:], Xtr_more))
dataset_y_full = np.concatenate(((np.array([1 for i in range(671)])),(np.array([0 for j in range(116)])), Ytr_more))

In [None]:
dataset_y_full.shape

In [None]:

shuffle_index = np.random.permutation(5599)
dataset_X_full, dataset_y_full = dataset_X_full[shuffle_index], dataset_y_full[shuffle_index]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(dataset_X_full, dataset_y_full, test_size=0.3, random_state=42)


In [None]:
model_real = Sequential()

model_real.add(Conv2D(64, 3, activation='relu', input_shape=(75, 75, 3)))
model_real.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
model_real.add(Dropout(0.2))


model_real.add(Conv2D(128, 3, activation='relu'))
model_real.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_real.add(Dropout(0.2))

model_real.add(Conv2D(256, 3, activation='relu'))
model_real.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_real.add(Dropout(0.2))


model_real.add(Conv2D(256, 3, activation='relu'))
model_real.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_real.add(Dropout(0.2))

model_real.add(Flatten())

model_real.add(Dense(512))
model_real.add(Activation('relu'))
model_real.add(Dropout(0.2))

model_real.add(Dense(256))
model_real.add(Activation('relu'))
model_real.add(Dropout(0.2))

# model.add(BatchNormalization(
#         momentum=0.95, 
#         epsilon=0.005,
#         beta_initializer=RandomNormal(mean=0.0, stddev=0.05), 
#         gamma_initializer=Constant(value=0.9)
#     ))

model_real.add(Dense(128))
model_real.add(Activation('relu'))
model_real.add(Dropout(0.2))

model_real.add(Dense(1, activation="sigmoid"))
opt = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
model_real.compile(loss="binary_crossentropy", optimizer=opt, metrics=["binary_accuracy"])
model_real.summary()


checkpoint_path = '/kaggle/working/training_2/'
earlyStopping = EarlyStopping(monitor='val_loss', patience=5, verbose=0, mode='min')
mcp_save = ModelCheckpoint(filepath=checkpoint_path, save_weights_only=True, save_best_only=True, monitor='val_loss', mode='min')
reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=7, verbose=1, epsilon=1e-4, mode='min')

# model.load_weights(checkpoint_path)
model_real.fit(dataset_X_full, dataset_y_full, validation_split = 0.2, batch_size=32, epochs=50, verbose=1, callbacks=[earlyStopping, reduce_lr_loss, mcp_save])

In [None]:
model_real.save_weights('/kaggle/working/answer')

In [None]:
prediction_real = model_real.predict(testik_X)

In [None]:
TP = FP = TN = FN = 0

for i in range(len(prediction_real)):
    if prediction_real[i][0] >= 0.5 and testik_y[i] == 1:
        TP +=1
    elif prediction_real[i][0] >= 0.5 and testik_y[i] == 0:
        FP +=1
    elif prediction_real[i][0] < 0.5 and testik_y[i] == 1:
        FN +=1
    elif prediction_real[i][0] < 0.5 and testik_y[i] == 0:
        TN +=1
        
pr = TP / (TP+FP)
rec = TP / (TP+FN)
acc = (TP+TN) / (TP+TN+FP+FN)
print('accuracy', acc)
print('precision', pr)
print('recall', rec)
print('F-mera', 2*pr*rec/(pr+rec))

In [None]:
import matplotlib.pyplot as plt
rrr = [(0.6459 , 0.5813),
(0.6349 , 0.5625),
(0.5686 , 0.6723),
(0.3437 , 0.8473),
(0.3741 , 0.8277),
(0.3166 , 0.8634),
(0.4727 , 0.8116),
(0.2804 , 0.8696),
(0.2486 , 0.8830),
(0.3271 , 0.8634),
(0.2484 , 0.8821),
(0.2616 , 0.8973),
(0.2526 , 0.8929),
(0.2515 , 0.8973),
(0.2408 , 0.8920),
(0.3315 , 0.8696),
(0.2397 , 0.8786),
(0.2873 , 0.8938),
(0.2330 , 0.9036),
(0.2500 , 0.9107),
(0.2567 , 0.9054),
(0.3132 , 0.8848),
(0.3222 , 0.9018),
(0.3413 , 0.8848)]
f1 = []
f2 = []
for i in range(len(rrr)):
    f1.append(rrr[i][0])
    f2.append(rrr[i][1])
    
plt.plot(f1)
plt.title("Функция потерь")
plt.xlabel("Эпохи")
plt.ylabel("log loss")
plt.show()


In [None]:
plt.title("Метрика точности")
plt.xlabel("Эпохи")
plt.ylabel("accuracy")
plt.plot(f2)
plt.show()

In [None]:
cnn_disp = RocCurveDisplay.from_estimator(model_real, dataset_X_full, dataset_y_full)

In [None]:

pp1 = PIL.Image.open('../input/sentinel-photo-1/s1a-ew-grd-hh-20211128t191435-20211128t191522-040775-04d6ff-001.tiff')
pp2 = PIL.Image.open('../input/sentinel-photo-1/s1a-ew-grd-hv-20211128t191435-20211128t191522-040775-04d6ff-002.tiff')
hhpp1 = np.array(pp1)
hvpp1 = np.array(pp2)

rhh = hhpp1[1561:1561+1275, 6538:6538+1275]
rhv = hvpp1[1561:1561+1275, 6538:6538+1275]
fig = plt.figure(0, figsize=(10,10))
ax = fig.add_subplot(1,2,1)
ax.set_title('iceberg band hh')
ax.imshow(rhh, cmap='jet')
ax = fig.add_subplot(1,2,2)
ax.set_title('iceberg band hv')
ax.imshow(rhv,cmap='jet')
plt.show()

# # centers_roi_1 = [[7744, 3292],[7555, 3879],[8269, 4028],[7846, 3176],[8108, 3377],[8399, 3275]]
# centers_roi_2 = [[6852,1945],[6804, 2368],[6959, 2448],[7055, 2226],[7032, 2142],[7793, 2649]]
# # centers_roi_3 = [[5131, 3193],[5260, 3032],[5286, 3358],[5421, 3300],[5052, 3775],[5208, 3961],[5529, 3714]]
# # pic2 = corner 6538, 1561, 1275x1275
# right_x = center[0] + 37
# left_x = center[0] - 38
# up_y = center[1] + 37
# down_y = center[1] - 38

# hh1 = img_hh4[1561:1561+1275, 6538:6538+1275]
# hv2 = img_hv4[1561:1561+1275, 6538:6538+1275]
# window = [75x75]


        

In [None]:
r = np.array([np.array([[1,2],[2,3],[3,4]])])
r.shape


In [None]:
count = 0
center_wx = 37
center_wy = 37
for i in range(17):
    for j in range(17):
        center_wx = 37 + 75*i
        center_wy = 37 + 75*j
        tmp_hh  = rhh[center_wx-37: center_wx+ 38,
                  center_wy - 37:center_wy+38]
        tmp_hv  = rhv[center_wx-37: center_wx+ 38,
                  center_wy - 37:center_wy+38]
        
        
        
        
        cleared_hh = np.nan_to_num(tmp_hh, nan=np.nanmean(tmp_hh), posinf=np.nanmax(tmp_hh), neginf=0)
        cleared_hv = np.nan_to_num(tmp_hv, nan=np.nanmean(tmp_hv), posinf=np.nanmax(tmp_hv), neginf=0)
        cut_hh = linear_to_decibel(cleared_hh)
        cut_hv = linear_to_decibel(cleared_hv)
        cut_sum = cut_hh + cut_hv
        
        
        denom_hh = denom_hv = denom_mean = 1
    
        if np.nanmax(cut_hh) - np.nanmin(cut_hh) != 0:
            denom_hh = np.nanmax(cut_hh) - np.nanmin(cut_hh)

        if np.nanmax(cut_hv) - np.nanmin(cut_hv) != 0:
            denom_hv = np.nanmax(cut_hv) - np.nanmin(cut_hv)

        if np.nanmax(cut_sum) - np.nanmin(cut_sum) != 0:
            denom_mean = np.nanmax(band_sum) - np.nanmin(cut_sum)
        a = (cut_hh - np.nanmean(cut_hh)) / denom_hh
        b = (cut_hv - np.nanmean(cut_hv)) / denom_hv
        c = (cut_sum - np.nanmean(cut_sum)) / denom_mean
    
        t = np.array([np.dstack((a,b,c))])
        
        pred = model_real.predict(t)
        
        if 0.9 > pred > 0.8 :
            print(center_wx, center_wy)
            rhh[center_wx-37: center_wx+ 38, center_wy - 37:center_wy+38] = np.ones((75,75))*1000
            rhv[center_wx-37: center_wx+ 38, center_wy - 37:center_wy+38] = np.ones((75,75))*1000

        
        
        

In [None]:
fig = plt.figure(0, figsize=(10,10))
ax = fig.add_subplot(1,2,1)
ax.set_title('iceberg band hh')
ax.imshow(rhh, cmap='jet')
ax = fig.add_subplot(1,2,2)
ax.set_title('iceberg band hv')
ax.imshow(rhv,cmap='jet')
plt.show()

In [None]:
rhh[1087-37: 1087+ 38, 187 - 37:187+38] = np.ones((75,75))*-5 1237 1087
rhv[1087-37: 1087+ 38, 187 - 37:187+38] = np.ones((75,75))*-5

In [None]:
fig = plt.figure(0, figsize=(10,10))
ax = fig.add_subplot(1,2,1)
ax.set_title('iceberg band hh')
ax.imshow(rhh, cmap='jet')
ax = fig.add_subplot(1,2,2)
ax.set_title('iceberg band hv')
ax.imshow(rhv,cmap='jet')
plt.show()

In [None]:
    tmp_hh = img_hh4[left_x:right_x, down_y:up_y]
    tmp_hv = img_hv4[left_x:right_x, down_y:up_y]
    
    cleared_hh = np.nan_to_num(tmp_hh, nan=np.nanmean(tmp_hh), posinf=np.nanmax(tmp_hh), neginf=0)
    cleared_hv = np.nan_to_num(tmp_hv, nan=np.nanmean(tmp_hv), posinf=np.nanmax(tmp_hv), neginf=0)

    cut_hh = linear_to_decibel(cleared_hh)
    cut_hv = linear_to_decibel(cleared_hv)
    
    band_sum = cut_hh + cut_hv # plus since log(x*y) = log(x) + log(y)
    
    if i == 8:
        fig = plt.figure(0, figsize=(10,10))
        ax = fig.add_subplot(1,2,1)
        ax.set_title('iceberg band hh')
        ax.imshow(tmp_hh, cmap='jet')
        ax = fig.add_subplot(1,2,2)
        ax.set_title('iceberg band hv')
        ax.imshow(tmp_hv,cmap='jet')
        plt.show()
        
    # Rescale
    denom_hh = denom_hv = denom_mean = 1
    
    if np.nanmax(cut_hh) - np.nanmin(cut_hh) != 0:
        denom_hh = np.nanmax(cut_hh) - np.nanmin(cut_hh)
        
    if np.nanmax(cut_hv) - np.nanmin(cut_hv) != 0:
        denom_hv = np.nanmax(cut_hv) - np.nanmin(cut_hv)
        
    if np.nanmax(band_sum) - np.nanmin(band_sum) != 0:
        denom_mean = np.nanmax(band_sum) - np.nanmin(band_sum)
#     if np.std(cut_hh) != 0:
#         denom_hh = np.std(cut_hh)
        
#     if np.std(cut_hv) != 0:
#         denom_hv = np.std(cut_hv)
        
#     if np.std(band_sum) != 0:
#         denom_mean = np.std(band_sum)
        
#     a = (cut_hh - np.nanmin(cut_hh)) / denom_hh
#     b = (cut_hv - np.nanmin(cut_hv)) / denom_hv
#     c = (band_sum - np.nanmin(band_sum)) / denom_mean
    a = (cut_hh - np.nanmean(cut_hh)) / denom_hh
    b = (cut_hv - np.nanmean(cut_hv)) / denom_hv
    c = (band_sum - np.nanmean(band_sum)) / denom_mean

In [None]:
fig = plt.figure(0, figsize=(10,10))
ax = fig.add_subplot(1,2,1)
ax.set_title('iceberg band hh')
ax.imshow(hh1, cmap='jet')
ax = fig.add_subplot(1,2,2)
ax.set_title('iceberg band hv')
ax.imshow(hv1,cmap='jet')
plt.show()

In [None]:
counter = 0
x = 37 
for i in range(1000):
    if 37 + 75*i < 1275:
        counter +=1

In [None]:
count

In [None]:
fig = plt.figure(0, figsize=(10,10))
ax = fig.add_subplot(1,2,1)
ax.set_title('iceberg band hh')
ax.imshow(hh1, cmap='jet')
ax = fig.add_subplot(1,2,2)
ax.set_title('iceberg band hv')
ax.imshow(hv1,cmap='jet')
plt.show()

In [None]:
# img_cutted_hh = []
# img_cutted_hv = []
# for i in range(7875):
#     row_hh = im_hh_array[10+i][1000:8875]
#     row_hv = im_hv_array[10+i][1000:8875]
#     img_cutted_hh.append(row_hh)
#     img_cutted_hv.append(row_hv)
    
# img_cut_hh = np.array(img_cutted_hh)
# img_cut_hv = np.array(img_cutted_hv)
# np.nan_to_num(img_cut_hh, nan=np.nanmean(img_cut_hh), posinf=img_cut_hh.max(), neginf=0)
# np.nan_to_num(img_cut_hv, nan=np.nanmean(img_cut_hv), posinf=img_cut_hv.max(), neginf=0)

In [None]:
# imgs_hh = []
# imgs_hv = []
# imgs_mean = []
# for i in range(7875):
#     row_hh = img_cut_hh[i]
#     row_hv = img_cut_hv[i]
#     row_mean = img_cut_hh[i]+img_cut_hv[i]
#     # Rescale
#     a_hh = (row_hh - row_hh.mean()) / (row_hh.max() - row_hh.min())
#     a_hv = (row_hv - row_hv.mean()) / (row_hv.max() - row_hv.min())
#     a_mean = (row_mean - row_mean.mean()) / (row_mean.max() - row_mean.min())
#     imgs_hh.append(a_hh)
#     imgs_hv.append(a_hv)
#     imgs_mean.append(a_mean)
    
# imagies_hh = np.array(imgs_hh)
# imagies_hv = np.array(imgs_hv)
# imagies_mean = np.array(imgs_mean)

In [None]:
# photos = []
# original_width, original_height = 7875, 7875
# photo_width, photo_height = 75, 75

# for i in range(original_width // photo_width):
#     for j in range(original_height // photo_height):
#         photo = np.dstack((imagies_hh[i*75 : (i+1)*75, j*75 : (j+1)*75],
#                            imagies_hv[i*75 : (i+1)*75, j*75 : (j+1)*75],
#                            imagies_mean[i*75 : (i+1)*75, j*75 : (j+1)*75]))
#         photos.append(photo)

In [None]:
# photos = np.array(photos)

In [None]:
# mask = []
# for i in range(len(prediction_real)):
#     if prediction_real[i] >= 0.5:
#         mask.append(1)
#     else:
#         mask.append(0)

In [None]:
# sp = np.array([[1, 1, 2, 2, 5, 5],
#                [1, 1, 2, 2, 5, 5],
#                [3, 3, 4, 4, 6, 6],
#                [3, 3, 4, 4, 6, 6],
#                [8, 8, 9, 9, 0, 0],
#                [8, 8, 9, 9, 0, 0]])
# mask1 = [0,0,1,0,0,0,0,0,0]
# 1 1 2 2 5 5   # 1 1 0 0 0 0
# 1 1 2 2 5 5   # 1 1 0 0 0 0
# 3 3 4 4 6 6   # 1 1 1 1 0 0
# 3 3 4 4 6 6   # 1 1 1 1 0 0
# 8 8 9 9 0 0   # 0 0 1 1 1 1
# 8 8 9 9 0 0   # 0 0 1 1 1 1

# matrix_m = np.zeros((7875, 7875))
# # matrix_m = np.zeros((6, 6))
# # original_width, original_height = 6, 6
# # photo_width, photo_height = 2, 2
# for i in range(original_width // photo_width): 
#     for j in range(original_height // photo_height): 
#         if mask[(original_width // photo_width)* i + j] == 1:
#             matrix_m[i*75 : (i+1)*75 , j*75 : (j+1)*75] = np.ones((75,75))*150

In [None]:
# res_mask = np.zeros([7875,7875,3])

# res_mask[:,:,0] = matrix_m/255.0
# res_mask[:,:,1] = np.zeros((7875, 7875))
# res_mask[:,:,2] = np.zeros((7875, 7875))

In [None]:
# plt.imshow(res_mask) не хватает памяти но работает

## SVM


In [None]:
X_train5, X_test5, y_train5, y_test5 = train_test_split(dataset_X_full, dataset_y_full, test_size=0.25, random_state=42)


In [None]:
X_train5.shape

In [None]:
svc = svm.SVC()
svc.fit(np.reshape(X_train5, (4199, 75*75*3)), y_train5)


In [None]:
res = clf.predict(X_test5)

In [None]:
svc_disp = RocCurveDisplay.from_estimator(svc, np.reshape(X_train5, (4199, 75*75*3)), dataset_y)

plt.show()



In [None]:
print(accuracy_score(res,y_test5))
print(precision_score(res,y_test5))
print(recall_score(res,y_test5))

### random forest

In [None]:
# Create the model with 200 trees
plt.figure(figsize = (8,8))
rfc = RandomForestClassifier(n_estimators=200, 
                               bootstrap = True)

rfc.fit(np.reshape(X_train5, (9600, 75*75*3)), y_train5)
ax = plt.gca()



plt.show()

### новый код


In [None]:
!pip3 install selective-search

In [None]:
from sklearn.model_selection import GridSearchCV  
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import matplotlib.pyplot as plt