In [None]:
import os
import tifffile
import numpy as np 
import pandas as pd
import lightgbm as lgb 

In [None]:
images = os.listdir('../data/train/s2_image/')
masks  = os.listdir('../data/train/mask/')

In [None]:
img = tifffile.imread('../data/train/mask/' + masks[1])

In [None]:
img.shape 

In [None]:
data_list = []

for i in range(len(images)):
    img = tifffile.imread('../data/train/s2_image//' + images[i]).astype(float)
    msk = tifffile.imread('../data/train/mask/'   + masks[i]).astype(float)

    reshaped_img = img.reshape(-1, 12) # (23, 23, 12) -> (23*23, 12)
    flatten_mask = msk.reshape(-1,1)   # (23, 23) -> (23*23, 1)

    is_train     = 1
    if i % 5 == 0:  is_train     = 0
    is_train_image = np.full((reshaped_img.shape[0], 1), is_train)
    
    combine_data = np.hstack((reshaped_img, flatten_mask))
    combine_data = np.hstack((combine_data, is_train_image))

    data_list.append(combine_data)

data_list = np.vstack(data_list)
columns   = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B9', 'B10', 'B11', 'B12', 'mask', 'is_train'] 
df        = pd.DataFrame(data_list, columns=columns)

In [None]:
df['is_train'].value_counts()

In [None]:
f = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B9', 'B10', 'B11', 'B12']
t = 'mask'

X_train = df[df['is_train']==1][f].values
X_test  = df[df['is_train']==0][f].values 

y_train = df[df['is_train']==1][t].values
y_test  = df[df['is_train']==0][t].values 

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:

params = {
    'n_estimators': 1000,
    'learning_rate': 0.01 
}
model = lgb.LGBMClassifier(**params)
model.fit(X_train, y_train, 
          eval_set = [(X_test, y_test)], 
          early_stopping_rounds=15,
          verbose = 300,
          )

In [None]:
images = os.listdir('../data/evaluation/')
masks  = os.listdir('../data/sample/')

images.sort()
masks.sort()

In [None]:
if not os.path.isdir('../data/output'):
    os.mkdir('../data/output')

In [None]:
data_list = []

for i in range(len(images)):
    img = tifffile.imread('../data/evaluation/' + images[i]).astype(float)
    msk = tifffile.imread('../data/sample/'     + masks[i]).astype(float)

    reshaped_img = img.reshape(-1, 12) # (23, 23, 12) -> (23*23, 12)
    name_image   = np.full((reshaped_img.shape[0], 1), masks[i])
    shape_image  = np.full((reshaped_img.shape[0], 2), (msk.shape[0], msk.shape[1]))

    combine_data = np.hstack((reshaped_img, name_image))
    combine_data = np.hstack((combine_data, shape_image))


    data_list.append(combine_data)

data_list = np.vstack(data_list)
columns   = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B9', 'B10', 'B11', 'B12', 'name', 'w', 'h'] 
df        = pd.DataFrame(data_list, columns=columns)

In [None]:
tifffile.imread('../data/sample/evaluation_mask_13.tif').astype(float).shape

In [None]:
df[df['name'] == 'evaluation_mask_13.tif']

In [None]:
X = df[f].values
preds = model.predict_proba(X)

In [None]:
preds = np.argmax(preds, axis=1)

In [None]:
df['mask'] = preds

In [None]:
df['h'] = df['h'].astype('int')
df['w'] = df['w'].astype('int')

In [None]:
for i in range(len(masks)):
    df_tmp     = df[df['name'] == masks[i]].reset_index(drop=True)
    preds_mask = df_tmp['mask'].values.astype(np.uint8)
    preds_mask = preds_mask.reshape(df_tmp['h'][0], df_tmp['w'][0])
    tifffile.imwrite('../data/output/' + df_tmp['name'][0], preds_mask)