In [None]:

# general libraries

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

# model architecture import lib.
from sklearn.metrics import confusion_matrix,classification_report, accuracy_score
from sklearn.model_selection import StratifiedKFold
from tensorflow import keras


# preprocessing libraries
from keras.preprocessing.image import ImageDataGenerator

# warning disable lib. 
import warnings
warnings.filterwarnings("ignore")

# model preparation lib. and tracking
from keras.models import Sequential
from keras.models import Model
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau, TensorBoard
from keras import optimizers, losses, activations, models
from keras.layers import Conv2D, Dense, Input, Flatten, Dropout, MaxPool2D, BatchNormalization, GlobalAveragePooling2D, Concatenate
%matplotlib inline
np.random.seed(123)



## Loading and Transforming CSV's

In [None]:
train = pd.read_csv('../input/recognizance-2/Data/train.csv')
y_train = train['Powerline'].values


In [None]:
train['Image file name'] = train['Image file name'].apply(lambda x: os.path.join("Powerline", x) if x.split(" ")[0]=="Powerline" else os.path.join("No_powerline", x))

In [None]:
train

In [None]:
# distribution of labels
sns.countplot(y_train)

## MODEL BUILDING 

In [None]:
# Convolutional Neural Network (Sequential Model)
from keras import layers
def Model(shape = (128,128,3)):
    return Sequential([
    
            layers.Conv2D(32,(3,3), input_shape=shape, activation='relu'),

            layers.Conv2D(64,(3,3),activation='relu'),
            layers.MaxPooling2D(pool_size=(2,2)),
            layers.BatchNormalization(),

            layers.Conv2D(128,(3,3),activation='relu'),
            layers.MaxPooling2D(pool_size=(2,2)),
            layers.BatchNormalization(),
            layers.Dropout(0.2),

            layers.Conv2D(64,(3,3),activation='relu'),
            layers.MaxPooling2D(pool_size=(2,2)),
            layers.BatchNormalization(),

            layers.Conv2D(32,(3,3),activation='relu'),
            layers.MaxPooling2D(pool_size=(2,2)),
            layers.BatchNormalization(),

            layers.Flatten(),
            layers.Dense(32, activation='relu'),
            layers.BatchNormalization(),

            layers.Dense(64, activation='relu'),
            layers.BatchNormalization(),
            layers.Dropout(0.2),

            layers.Dense(32, activation='relu'),
            layers.BatchNormalization(),

            layers.Dense(1, activation='sigmoid')

            ])



## TEST DATA LOADING

In [None]:
size = 128
batch_size = 32
epochs = 30

import cv2
name = []
imgs = []
for i in os.listdir('../input/recognizance-2/Data/test'):
    name.append(i)
    img = cv2.imread(os.path.join('../input/recognizance-2/Data/test', i))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (size,size)).astype('float32')/255
    imgs.append(img)

imgs = np.asarray(imgs)
name =np.asarray(name)
print(imgs.shape)
print(name.shape)


# USING 5 folds CV

We used 5 folds cross validation **so that our model will not overfit to public leaderboard**,
Also we used image data preprocessing 

In [None]:



stkfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2021)
train_idg = ImageDataGenerator(vertical_flip = False,
                               rescale = 1./255,
                               horizontal_flip= True,
                               height_shift_range=0.1,
                               width_shift_range=0.1, 
                               rotation_range = 10,
                               shear_range=0.2,
                               zoom_range=0.2)
        
# creating batches of data
val_idg = ImageDataGenerator( rescale = 1./255)
        

# defining some callbacks

def callbacks(file_path):
    
   # lr_schedule = LearningRateScheduler(lambda x: 1e-3*(0.99**x))

    reduce_learning_rate = ReduceLROnPlateau(monitor='val_loss',
                                         factor=0.5,
                                         patience=4,
                                         cooldown=1,
                                         min_lr=0.00001,
                                         verbose=1)

    checkpoint = ModelCheckpoint(filepath = file_path, monitor='val_loss', verbose=2, save_best_only=True, mode='min')
    
    early = early = EarlyStopping(monitor="val_loss", mode="min", patience=7)
    
    return [ reduce_learning_rate, checkpoint, early ]

def calc(df, y, cv):
    res=[]
    oof_preds = 0

    for i, (tdx, vdx) in enumerate(cv.split(np.zeros(len(y)), y)):
        
        X_train, X_valid = df.iloc[tdx], df.iloc[vdx]
        
        
        train_gen = train_idg.flow_from_dataframe(
                 X_train,
                 directory = "../input/recognizance-2/Data/train",
                 x_col = 'Image file name',
                 y_col = 'Powerline',
                 class_mode="binary",
                 target_size = (size, size),
                 shuffle = True,
                 batch_size = batch_size
                )
        
        val_gen = val_idg.flow_from_dataframe(
                 X_valid,
                 directory = "../input/recognizance-2/Data/train",
                 x_col = 'Image file name',
                 y_col = 'Powerline',
                 class_mode="binary",
                 target_size = (size, size),
                 shuffle = False,
                 batch_size = batch_size
                )
        
        model = Model(shape = (size,size,3))
        callbacks_list = callbacks('model-{}.hdf5'.format(i))

        model.compile(optimizer = keras.optimizers.Adam(lr = 1e-3),metrics = ['accuracy'], loss = 'binary_crossentropy')

        history = model.fit(train_gen,validation_data = val_gen, epochs = epochs,
                              batch_size = batch_size,
                        steps_per_epoch = len(X_train)//batch_size,
                        validation_steps = len(X_valid)//batch_size, callbacks = callbacks_list)
      
    
        model.load_weights('model-{}.hdf5'.format(i))
        preds = model.predict(val_gen)
      #  print(val_gen.classes)
        oof_preds += model.predict(imgs)
        score = accuracy_score(val_gen.classes, np.where(preds>0.5, 1, 0))
        # may differ
        print (score)
        res.append(score)

    print('Accuracy:', round(np.mean(res), 6))    
    return oof_preds/5

In [None]:
%%time
probs = calc(train, y_train, stkfold)

GOT 5 FOLDS CROSS VALIDATION **accuracy= 0.9582**

In [None]:
#preds = model.predict(imgs)
probs1 = np.where(probs>0.5, 'YES', 'NO')

In [None]:
probs1 = probs1.reshape(-1,)
probs1.shape

In [None]:
sub = pd.DataFrame(columns = ['Image file name', 'Powerline'])
sub['Image file name'] = name
sub['Powerline'] = probs1

In [None]:
sub.to_csv('Model1.csv', index = False)

#### Submitting this prediction gives the public leaderboard score of 0.9822

# POST PROCESSING ON TEST DATA

Now we will use post processing with the 5 models obtained from above to futher increase our accuracy

**When the loop is run for 16 times we got accuracy==0.9895 (significant boost)**

In [None]:
test_idg = ImageDataGenerator(vertical_flip = False,
                               horizontal_flip= True,
                               height_shift_range=0.1,
                               width_shift_range=0.1, 
                               rotation_range = 10,
                               shear_range=0.2,
                               zoom_range=0.2)
        

#test_idg.fit(imgs)     

# defining some callbacks




preds = 0
    
for i in range(5):
    model = Model(shape = (size,size,3))
    model.load_weights('model-{}.hdf5'.format(i))
    for j in range(16):
        preds += model.predict(test_idg.flow(imgs, shuffle=False))
        print(i, j)
      #  print(val_gen.classes)
        # may differ


In [None]:
preds = preds/80

In [None]:
probs2 = np.where(preds>0.5, 'YES', 'NO')
probs2 = probs2.reshape(-1,)
sub = pd.DataFrame(columns = ['Image file name', 'Powerline'])
sub['Image file name'] = name
sub['Powerline'] = probs2
sub.to_csv('Final_one_with_25.csv', index = False)

**This is the final we had submitted which takes us to the score we got finally on private leaderboard i.e., 0.98989**