In [None]:
!pip install -qqq ipyplot

Import Libraries

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn
import pywt

from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn import svm

import sklearn.metrics as skm
from sklearn.metrics import roc_auc_score as ras
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from skimage.filters import roberts, prewitt, butterworth, difference_of_gaussians, farid, frangi, hessian
from catboost import CatBoostClassifier


import cv2
import torch
from glob import glob
from tqdm import tqdm
from ipyplot import plot_images
import os

from torch.utils.data import Dataset, DataLoader

In [None]:
df_train = pd.read_csv('train.csv')
print(len(df_train))
path = 'Dataset/train/'
df_train['filename'] = df_train['filename'].apply(lambda x: path + x)
df_train.head()

In [None]:
test_imgs = []
for file in glob("Dataset/test/*.jpg"):
    test_imgs.append({'filename': file})
df_test = pd.DataFrame(test_imgs)
print("Test images: ", len(df_test))

In [None]:
train_blur = df_train[df_train["blur"]==1]
train_sharp = df_train[df_train["blur"]==0]

print("blur images:", len(train_blur))
print("sharp images:", len(train_sharp))

In [None]:
images = train_sharp.filename.sample(6).tolist()
plot_images(images, max_images=10,img_width=220,force_b64=True)

In [None]:
images = train_blur.filename.sample(6).tolist()
plot_images(images,max_images=10,img_width=220,force_b64=True)

In [None]:
def read_image_gray(path):
    return cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2GRAY)


class CustomDataset(Dataset):
    def __init__(self, df):
        self.df = df
               
    def __len__(self):
        return len(self.df)


class CustomDatasetTrain(CustomDataset):
    def __init__(self, df, transform=None):
        super().__init__(df)
        self.transform = transform
   
    def __getitem__(self,idx):
        image = self.df.filename.iloc[idx]
        image = read_image_gray(image)
        if self.transform:
            image = self.transform(image)
        label = self.df.blur.iloc[idx]
        return {'image': image, 'label': label}


class CustomDatasetTest(CustomDataset):
    def __getitem__(self,idx):
        image = self.df.filename.iloc[idx]
        image = read_image_gray(image)
        return {'image': image}
    
train_dataset = CustomDatasetTrain(df_train)
train_dataloader = DataLoader(
            dataset=train_dataset,
            batch_size=16,
            ),


test_dataset = CustomDatasetTest(df_test)
test_dataloader = DataLoader(
            dataset=test_dataset,
            batch_size=16,
            ),

In [None]:
'''
размытие — это форма уменьшения пропускной способности изображения.
Его можно идентифицировать с помощью многих индикаторов:
- Fourier transform
- Edge detection filters (Laplacian, Sobel,  Scharr, .. ).
- добавление размытия к самому изображению и вычитание результата из исходного
изображения (Для размытых изображений разница будет намного меньше).
Для этой задачи я использовал детекторы краев и несколько фильтров размытия для
сбора необходимых признаков каждого изображения, а также некоторые преобразования
для определения высоких и низких частот в изображении (Discrete wavelet transform,
Butterworth filter (high and low)). После этого были протестированы 3 классификатора:
CatBoost, Random Forest и классификатор нейронной сети. Все добились отличных результатов.
CatBoost имеет самую высокую AUC и точность.
'''

def calc_fft_measure(gray_img, size=20):
    
    (n, h, w) = gray_img.shape
    (cx, cy) = (int(w/2.0), int(h/2.0))
    fft = np.fft.fft2(gray_img, axes=(1,2))
    fftShift = np.fft.fftshift(fft, axes=(1,2))

    fftShift[:, cy - size : cy + size, cx - size : cx + size] = 0
    fftShift = np.fft.ifftshift(fftShift, axes=(1, 2))
    recon = np.fft.ifft2(fftShift, axes=(1, 2))
    
    magnitude = 20 * np.log(np.abs(recon))
    mean = 1/np.average(magnitude, axis=(1, 2))

    return mean

def calc_tv_measure(gray_img):
    
    gray_img = gray_img.detach().clone()
    #gray_img = gray_img[:, gray_img.shape[1]//4:3*gray_img.shape[1]//4,gray_img.shape[2]//4:3*gray_img.shape[2]//4]
    
    w_variance = torch.sum(torch.pow(gray_img[:, :, 1:] - gray_img[:, :, :-1], 2), dim=[1, 2])
    h_variance = torch.sum(torch.pow(gray_img[:, 1:, :] - gray_img[:, :-1, :], 2), dim=[1, 2])

    l2_score = (h_variance + w_variance)

    w_variance = torch.sum(torch.abs(gray_img[:, :, 1:] - gray_img[:, :, :-1]), dim=[1, 2])
    h_variance = torch.sum(torch.abs(gray_img[:, 1:, :] - gray_img[:, :-1, :]), dim=[1, 2])
    
    l1_score = (h_variance + w_variance)
    
    tv_measure = l1_score/l2_score
    
    return tv_measure


def variance_of_laplacian(img):
    return cv2.Laplacian(img, cv2.CV_64F).var()

def variance_of_laplacian2(img):
    return cv2.Laplacian(img, cv2.CV_64F).mean()


def sobel_measure(img, ksize=3):
    img = img.astype(np.float32)/255.
    gX = cv2.Sobel(img, ddepth=cv2.CV_32F, dx=1, dy=0, ksize=ksize)
    gY = cv2.Sobel(img, ddepth=cv2.CV_32F, dx=0, dy=1, ksize=ksize)
    edges = cv2.addWeighted(gX, 0.5, gY, 0.5, 0)
    return edges.var()

def sobel_measure2(img, ksize=3):
    img = img.astype(np.float32)/255.
    gX = cv2.Sobel(img, ddepth=cv2.CV_32F, dx=1, dy=0, ksize=ksize)
    gY = cv2.Sobel(img, ddepth=cv2.CV_32F, dx=0, dy=1, ksize=ksize)
    edges = cv2.addWeighted(gX, 0.5, gY, 0.5, 0)
    return edges.mean()


def diff_values(img, ksize=15):
    img = img.astype(np.float32)    #/ 255.
    blurred = cv2.GaussianBlur(img, (ksize, ksize), 0).astype(np.float32)
    diff = np.abs(img - blurred)
    diff = np.mean((diff))
    return diff


def bilaterial_filt(img, d=13, sColor=150, sSpace=50):
    img = img.astype(np.float32)
    filtered = cv2.bilateralFilter(img, d,sColor, sSpace).astype(np.float32)
    diff = np.abs(img - filtered)
    diff = np.mean((diff))
    return diff


def canny_values(img, thresh1=10, thresh2=245, L2gradient=True):
    edges = cv2.Canny(image=img, threshold1=thresh1, threshold2=thresh2)
    edges = np.abs(edges)
    return edges.var()

def canny_values2(img, thresh1=10, thresh2=245, L2gradient=True):
    edges = cv2.Canny(image=img, threshold1=thresh1, threshold2=thresh2)
    edges = np.abs(edges)
    return edges.mean()


def median_values(img, k=13):
    img = img.astype(np.float32)
    blurred = cv2.medianBlur(img.astype(np.uint8), k).astype(np.float32)
    diff = np.abs(img - blurred)
    diff = np.mean((diff))
    return diff

def blurring_values(img, k=13):
    img = img.astype(np.float32)
    blurred = cv2.blur(img, (k,k)).astype(np.float32)
    diff = np.abs(img - blurred)
    diff = np.mean((diff))
    return diff


def scharr_measure(img, ksize=3):
    img = img.astype(np.float32)/255.
    filteredx = cv2.Scharr(img, ddepth=cv2.CV_32F, dx=0, dy=1)
    filteredy = cv2.Scharr(img, ddepth=cv2.CV_32F, dx=1, dy=0)
    edges = cv2.addWeighted(filteredx, 0.5, filteredy, 0.5, 0)
    return edges.var()

def scharr_measure2(img, ksize=3):
    img = img.astype(np.float32)/255.
    filteredx = cv2.Scharr(img, ddepth=cv2.CV_32F, dx=0, dy=1)
    filteredy = cv2.Scharr(img, ddepth=cv2.CV_32F, dx=1, dy=0)
    edges = cv2.addWeighted(filteredx, 0.5, filteredy, 0.5, 0)
    return edges.mean()


def roberts_measure(img):
    return roberts(img).var()

def roberts_measure2(img):
    return roberts(img).mean()

def prewitt_measure(img):
    return prewitt(img).var()

def prewitt_measure2(img):
    return prewitt(img).mean()


def dwt_values(img, lvl=3):
    coeffs = pywt.wavedec(img, 'db1', level=lvl)
    return coeffs

def high_pass_measure(img):
    high_pass = butterworth(img, 0.07, True, 8)
    return high_pass.var()

def high_pass_measure2(img):
    high_pass = butterworth(img, 0.07, True, 8)
    return high_pass.mean()

def low_pass_measure(img):
    low_pass = butterworth(img, 0.01, False, 4, channel_axis=-1)
    return (low_pass-img).var()

def low_pass_measure2(img):
    low_pass = butterworth(img, 0.01, False, 4, channel_axis=-1)
    return (low_pass-img).mean()

def gaussian_diff_measure(img):
    filtered_image = difference_of_gaussians(img, 4, 8, channel_axis=-1)
    return variance_of_laplacian(filtered_image)

In [None]:
img = cv2.imread('Dataset/train/cidbfvfqbfgyvxyfzvmj.jpg', 0)
high_pass = butterworth(img, 0.07, True, 8)
low_pass = butterworth(img, 0.01, False, 4, channel_axis=-1)
plt.imshow(high_pass)
plt.show()
print(high_pass.var())
plt.imshow(img-low_pass)
plt.show()
print((img-low_pass).var())

img = cv2.imread('Dataset/train/aladjjhlunpjdwsxlwus.jpg', 0)
high_pass = butterworth(img, 0.07, True, 8)
low_pass = butterworth(img, 0.01, False, 4, channel_axis=-1)
plt.imshow(high_pass)
plt.show()
print(high_pass.var())
plt.imshow((img-low_pass))
plt.show()
print((img-low_pass).var())

img = cv2.imread('Dataset/train/aiiujlpbdrhdeexrsrzf.jpg', 0)
high_pass = butterworth(img, 0.07, True, 8)
low_pass = butterworth(img, 0.01, False, 4, channel_axis=-1)
plt.imshow(high_pass)
plt.show()
print(high_pass.var())
plt.imshow((img-low_pass))
plt.show()
print((img-low_pass).var())


In [None]:
img = cv2.imread('Dataset/train/bzrdseobabtzeoglrejr.jpg', 0)
high_pass = butterworth(img, 0.07, True, 8)
low_pass = butterworth(img, 0.01, False, 4, channel_axis=-1)
plt.imshow(high_pass)
plt.show()
print(high_pass.var())
plt.imshow((img-low_pass))
plt.show()
print((img-low_pass).var())

img = cv2.imread('Dataset/train/cyougvqjhsmvbzqirrxc.jpg', 0)
high_pass = butterworth(img, 0.07, True, 8)
low_pass = butterworth(img, 0.01, False, 4, channel_axis=-1)
plt.imshow(high_pass)
plt.show()
print(high_pass.var())
plt.imshow((img-low_pass))
plt.show()
print((img-low_pass).var())

img = cv2.imread('Dataset/train/casjinialxtcuzuxotyn.jpg', 0)
high_pass = butterworth(img, 0.07, True, 8)
low_pass = butterworth(img, 0.01, False, 4, channel_axis=-1)
plt.imshow(high_pass)
plt.show()
print(high_pass.var())
plt.imshow((img-low_pass))
plt.show()
print((img-low_pass).var())


In [None]:
tv_measures = []
fft_measures = []

for dl in train_dataloader:
    for batch in tqdm(dl):
        tv_measures.extend(calc_tv_measure(batch['image']).tolist())
        fft_measures.extend(calc_fft_measure(batch['image']).tolist())

laplacian_measure = []
laplacian_measure2 = []
sobel_values = []
sobel_values2 = []
differencial_values = []
bilateral_vals = []
canny_vals = []
canny_vals2 = []
image_variance = []
median_vals = []
scharr_vals = []
scharr_vals2 = []
blur_vals = []
roberts_vals = []
roberts_vals2 = []
prewitt_vals = []
prewitt_vals2 = []

high_freq_vals = []
low_freq_vals = []

dwt_vals1_v = []
dwt_vals1_m = []
dwt_vals2_v = []
dwt_vals2_m = []
dwt_vals3_v = []
dwt_vals3_m = []
dwt_vals4_v = []
dwt_vals4_m = []



for path in df_train['filename']:
    img = cv2.imread(path, 0)
    laplacian_measure.append(variance_of_laplacian(img))
    laplacian_measure2.append(variance_of_laplacian2(img))
    sobel_values.append(sobel_measure(img))
    sobel_values2.append(sobel_measure2(img))
    differencial_values.append(diff_values(img))
    bilateral_vals.append(bilaterial_filt(img))
    canny_vals.append(canny_values(img, 125, 160))
    canny_vals2.append(canny_values2(img, 125, 160))
    image_variance.append(img.var())
    median_vals.append(median_values(img))
    scharr_vals.append(scharr_measure(img))
    scharr_vals2.append(scharr_measure2(img))
    blur_vals.append(blurring_values(img))
    roberts_vals.append(roberts_measure(img))
    roberts_vals2.append(roberts_measure2(img))
    prewitt_vals.append(prewitt_measure(img))
    prewitt_vals2.append(prewitt_measure2(img))
    high_freq_vals.append(high_pass_measure(img))
    low_freq_vals.append(low_pass_measure(img))
    
    coeffs = dwt_values(img, 5)
    dwt_vals1_v.append(coeffs[1].var())
    dwt_vals1_m.append(coeffs[1].mean())    
    dwt_vals2_v.append(coeffs[2].var())
    dwt_vals2_m.append(coeffs[2].mean())
    dwt_vals3_v.append(coeffs[3].var())
    dwt_vals3_m.append(coeffs[3].mean())
    dwt_vals4_v.append(coeffs[4].var())
    dwt_vals4_m.append(coeffs[4].mean())

df_train['tv_measure'] = tv_measures
df_train['fft_measure'] = fft_measures
df_train['laplacian_measure'] = laplacian_measure
df_train['laplacian_measure2'] = laplacian_measure2
df_train['sobel_measure'] = sobel_values
df_train['sobel_measure2'] = sobel_values2
df_train['differencial_values'] = differencial_values
df_train['bilateral_values'] = bilateral_vals
df_train['canny_values'] = canny_vals
df_train['canny_vals2'] = canny_vals2
df_train['variance_values'] = image_variance
df_train['median_values'] = median_vals
df_train['scharr_vals'] = scharr_vals
df_train['scharr_vals2'] = scharr_vals2
df_train['blur_vals'] = blur_vals
df_train['roberts_vals'] = roberts_vals
df_train['roberts_vals2'] = roberts_vals2
df_train['prewitt_vals'] = prewitt_vals
df_train['prewitt_vals2'] = prewitt_vals2
df_train['high_freq_vals'] = high_freq_vals
df_train['low_freq_vals'] =  low_freq_vals

df_train['dwt_vals1'] =  dwt_vals1_v
df_train['dwt_vals2'] =  dwt_vals1_m
df_train['dwt_vals3'] =  dwt_vals2_v
df_train['dwt_vals4'] =  dwt_vals2_m
df_train['dwt_vals5'] =  dwt_vals3_v
df_train['dwt_vals6'] =  dwt_vals3_m
df_train['dwt_vals7'] =  dwt_vals4_v
df_train['dwt_vals8'] =  dwt_vals4_m

df_train[:15]

In [None]:
#Construct the dataset to  be fed to a Random Forest Classifier
train_RFC = [df_train['tv_measure'].tolist(),
             df_train['fft_measure'].tolist(),
             df_train['laplacian_measure'].tolist(),
             df_train['laplacian_measure2'].tolist(),
             df_train['sobel_measure'].tolist(),
             df_train['sobel_measure2'].tolist(),
             df_train['differencial_values'].tolist(),
             df_train['bilateral_values'].tolist(),
             df_train['canny_values'].tolist(),
             df_train['canny_vals2'].tolist(),
             df_train['variance_values'].tolist(),
             df_train['median_values'].tolist(),
             df_train['scharr_vals'].tolist(),
             df_train['scharr_vals2'].tolist(),
             df_train['blur_vals'].tolist(),
             df_train['roberts_vals'].tolist(),
             df_train['roberts_vals2'].tolist(),
             df_train['prewitt_vals'].tolist(),
             #df_train['prewitt_vals2'].tolist(),             
             df_train['high_freq_vals'].tolist(),
             df_train['low_freq_vals'].tolist(),
             df_train['low_pass_val2'].tolist(),
             df_train['high_pass_val2'].tolist(),
             #df_train['gaussian_diff_vals'].tolist(),
             
             
             #df_train['dwt_vals1'].tolist(),
             #df_train['dwt_vals2'].tolist(),
             df_train['dwt_vals3'].tolist(),
             df_train['dwt_vals4'].tolist(),
             df_train['dwt_vals5'].tolist(),
             df_train['dwt_vals6'].tolist(),
             df_train['dwt_vals7'].tolist(),
             df_train['dwt_vals8'].tolist(),
             ]

train_RFC_np = np.array(train_RFC)
train_RFC_np = train_RFC_np.T

test_RFC = df_train['blur'].tolist()
test_RFC_np = np.array(test_RFC)



In [None]:
df_train.head()

In [None]:
#Build CatBoost Classifier

X = train_RFC_np
y = test_RFC_np

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=0)

clf = CatBoostClassifier(
    iterations=1000, 
    learning_rate=0.04,
    loss_function='CrossEntropy',
    verbose = False,)


clf.fit(X_train, y_train,
        eval_set=(X_val, y_val),)

print('CatBoost model is fitted: ' + str(clf.is_fitted()))
print('CatBoost model parameters:')
print(clf.get_params())

pred_test_prob = clf.predict_proba(X_val)
AUC = ras(y_val, pred_test_prob[:, 1])
print('AUC catBoost = ', AUC)
print('Acc = ', clf.score(X_val, y_val))

In [None]:
#Test on 33% and 67% of the test dataset

pred_test_prob33 = clf.predict_proba(X_val[:len(X_val)//3])
AUC33 = ras(y_val[:len(y_val)//3], pred_test_prob33[:, 1])
print('AUC random forest 33% = ', AUC33)

pred_test_prob66 = clf.predict_proba(X_val[len(X_val)//3:])
AUC66 = ras(y_val[len(y_val)//3:], pred_test_prob66[:, 1])
print('AUC random forest 67% = ', AUC66)

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import *

#Build a NN as a classifier

X_train, X_val, y_train, y_val = train_test_split(train_RFC_np, test_RFC_np, test_size=0.2, random_state=0)

tf.random.set_seed(42)
model = Sequential()
model.add(Dense(900, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(900, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(900, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics= ['accuracy'])

model.fit(X_train, y_train, epochs = 200, verbose = 1)
loss, accuracy = model.evaluate(X_val, y_val)

In [None]:
preds = model.predict(X_val)
AUC = ras(y_val, preds)
print('AUC NN = ', AUC)

In [None]:
#Build the Random Forest classifier
X = train_RFC_np
Y = test_RFC_np

trainCount = round(0.8*len(X))
Xtrain = X[0:trainCount]
Xtest = X[trainCount:]
Ytrain = Y[0:trainCount]
Ytest = Y[trainCount:]

#Find the optimal number of estimators to obtain the best accuracy
AUC = []
for i in range(1, 702, 10):
    clfRF = RFC(random_state=0, n_estimators=i).fit(Xtrain, Ytrain)
    pred_test_prob = clfRF.predict_proba(Xtest)
    #print('{:.5}, {}'.format(ras(Ytest, pred_test_prob[:, 1]), i))
    AUC.append(ras(Ytest, pred_test_prob[:, 1]))
#print(AUC.index(max(AUC)))
n = AUC.index(max(AUC))*10 + 1
print(n)

clfRF = RFC(random_state=0, n_estimators=n).fit(Xtrain, Ytrain)

pred_test = clfRF.predict(Xtest)
pred_test_probRF = clfRF.predict_proba(Xtest)
acc_test = clfRF.score(Xtest, Ytest)

skm.RocCurveDisplay.from_estimator(clfRF, Xtest, Ytest)
plt.show()

AUC = ras(Ytest, pred_test_probRF[:, 1])
print('AUC random forest = ', AUC)
#0.9947449985911525

In [None]:
#Test on 33% and 67% of the test dataset

pred_test_prob33 = clfRF.predict_proba(Xtest[:len(Xtest)//3])
AUC33 = ras(Ytest[:len(Xtest)//3], pred_test_prob33[:, 1])
print('AUC random forest 33% = ', AUC33)

pred_test_prob66 = clfRF.predict_proba(Xtest[len(Xtest)//3:])
AUC66 = ras(Ytest[len(Xtest)//3:], pred_test_prob66[:, 1])
print('AUC random forest 67% = ', AUC66)

In [None]:
#Calculate the measures for test images

tv_measures = []
fft_measures = []


for dl in test_dataloader:
    for batch in tqdm(dl):
        tv_measures.extend(calc_tv_measure(batch['image']).tolist())
        fft_measures.extend(calc_fft_measure(batch['image']).tolist())

laplacian_measure = []
laplacian_measure2 = []
sobel_values = []
sobel_values2 = []
differencial_values = []
bilateral_vals = []
canny_vals = []
canny_vals2 = []
image_variance = []
median_vals = []
scharr_vals = []
scharr_vals2 = []
blur_vals = []
roberts_vals = []
roberts_vals2 = []
prewitt_vals = []

high_freq_vals = []
low_freq_vals = []
low_pass_val2 = []
high_pass_val2 = []

dwt_vals2_v = []
dwt_vals2_m = []
dwt_vals3_v = []
dwt_vals3_m = []
dwt_vals4_v = []
dwt_vals4_m = []


for path in df_test['filename']:
    img = cv2.imread(path, 0)
    laplacian_measure.append(variance_of_laplacian(img))
    laplacian_measure2.append(variance_of_laplacian2(img))
    sobel_values.append(sobel_measure(img))
    sobel_values2.append(sobel_measure2(img))
    differencial_values.append(diff_values(img))
    bilateral_vals.append(bilaterial_filt(img))
    canny_vals.append(canny_values(img, 125, 160))
    canny_vals2.append(canny_values2(img, 125, 160))
    image_variance.append(img.var())
    median_vals.append(median_values(img))
    scharr_vals.append(scharr_measure(img))
    scharr_vals2.append(scharr_measure2(img))
    blur_vals.append(blurring_values(img))
    roberts_vals.append(roberts_measure(img))
    roberts_vals2.append(roberts_measure2(img))
    prewitt_vals.append(prewitt_measure(img))
    high_freq_vals.append(high_pass_measure(img))
    low_freq_vals.append(low_pass_measure(img))
    low_pass_val2.append(low_pass_measure2(img))
    high_pass_val2.append(high_pass_measure2(img))
    
    coeffs = dwt_values(img, 5)  
    dwt_vals2_v.append(coeffs[2].var())
    dwt_vals2_m.append(coeffs[2].mean())
    dwt_vals3_v.append(coeffs[3].var())
    dwt_vals3_m.append(coeffs[3].mean())
    dwt_vals4_v.append(coeffs[4].var())
    dwt_vals4_m.append(coeffs[4].mean())

df_test['tv_measure'] = tv_measures
df_test['fft_measure'] = fft_measures
df_test['laplacian_measure'] = laplacian_measure
df_test['laplacian_measure2'] = laplacian_measure2
df_test['sobel_measure'] = sobel_values
df_test['sobel_measure2'] = sobel_values2
df_test['differencial_values'] = differencial_values
df_test['bilateral_values'] = bilateral_vals
df_test['canny_values'] = canny_vals
df_test['canny_vals2'] = canny_vals2
df_test['variance_values'] = image_variance
df_test['median_values'] = median_vals
df_test['scharr_vals'] = scharr_vals
df_test['scharr_vals2'] = scharr_vals2
df_test['blur_vals'] = blur_vals
df_test['roberts_vals'] = roberts_vals
df_test['roberts_vals2'] = roberts_vals2
df_test['prewitt_vals'] = prewitt_vals
df_test['high_freq_vals'] = high_freq_vals
df_test['low_freq_vals'] =  low_freq_vals
df_test['low_pass_val2'] = low_pass_val2
df_test['high_pass_val2'] = high_pass_val2
        
        
df_test['dwt_vals3'] =  dwt_vals2_v
df_test['dwt_vals4'] =  dwt_vals2_m
df_test['dwt_vals5'] =  dwt_vals3_v
df_test['dwt_vals6'] =  dwt_vals3_m
df_test['dwt_vals7'] =  dwt_vals4_v
df_test['dwt_vals8'] =  dwt_vals4_m

df_test.head()

In [None]:
#Form the input to the classifier

test_RFC =  [df_test['tv_measure'].tolist(),
             df_test['fft_measure'].tolist(),
             df_test['laplacian_measure'].tolist(),
             df_test['laplacian_measure2'].tolist(),
             df_test['sobel_measure'].tolist(),
             df_test['sobel_measure2'].tolist(),
             df_test['differencial_values'].tolist(),
             df_test['bilateral_values'].tolist(),
             df_test['canny_values'].tolist(),
             df_test['canny_vals2'].tolist(),
             df_test['variance_values'].tolist(),
             df_test['median_values'].tolist(),
             df_test['scharr_vals'].tolist(),
             df_test['scharr_vals2'].tolist(),
             df_test['blur_vals'].tolist(),
             df_test['roberts_vals'].tolist(),
             df_test['roberts_vals2'].tolist(),
             df_test['prewitt_vals'].tolist(),
             #df_train['prewitt_vals2'].tolist(),
             
             df_test['high_freq_vals'].tolist(),
             df_test['low_freq_vals'].tolist(),
             df_test['low_pass_val2'].tolist(),
             df_test['high_pass_val2'].tolist(),
             
             df_test['dwt_vals3'].tolist(),
             df_test['dwt_vals4'].tolist(),
             df_test['dwt_vals5'].tolist(),
             df_test['dwt_vals6'].tolist(),
             df_test['dwt_vals7'].tolist(),
             df_test['dwt_vals8'].tolist(),
            ]

test_RFC_np = np.array(test_RFC)
test_RFC_np = test_RFC_np.T


In [None]:
#Perform the prediction using RF, and save the results to a csv file

preds = clfRF.predict_proba(test_RFC_np)

submission = pd.DataFrame()
submission['filename'] = df_test['filename'].apply(lambda x : os.path.split(x)[1])
submission['blur'] = preds[:, 1]
submission.head()
submission.to_csv('RF_predictions.csv', index=False)

In [None]:
#Perform the prediction using CatBoost, and save the results to a csv file

preds = clf.predict_proba(test_RFC_np)

submission = pd.DataFrame()
submission['filename'] = df_test['filename'].apply(lambda x : os.path.split(x)[1])
submission['blur'] = preds[:, 1]
submission.head()
submission.to_csv('catBoost_predictions.csv', index=False)

In [None]:
#Perform the prediction using CatBoost, and save the results to a csv file

preds = model.predict(X_val)

submission = pd.DataFrame()
submission['filename'] = df_test['filename'].apply(lambda x : os.path.split(x)[1])
submission['blur'] = preds[:, 1]
submission.head()
submission.to_csv('NN_predictions.csv', index=False)