In [1]:
import cv2
import joblib
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pickle
import rasterio
import time

from scipy import ndimage as nd
from skimage import filters
from skimage.filters import roberts, sobel, scharr, prewitt
from skimage.morphology import disk
from skimage.segmentation import felzenszwalb, quickshift, slic, watershed
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from yellowbrick.classifier import ROCAUC

In [2]:
def image_to_df(snow_path, mask_path, print_gabor=False, labeled=True):
    #load in images
    img = cv2.imread(snow_path)
    if labeled:
        mask = rasterio.open(mask_path)
    
    #generate grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    #flatten image
    img2 = img.reshape((-1,3))

    #create dataframe
    df = pd.DataFrame(img2, columns=['Blue', 'Green', 'Red'])
    df['Gray'] = gray.reshape(-1)

    #gabor filter
    num = 1
    gabors = [5, 11, 23, 8, 6, 4]
    kernels = []
    for theta in range(2):
        theta = theta / 4. * np.pi
        for sigma in (1,3):
            for lamda in np.arange(0, np.pi, np.pi/4):
                for gamma in (.05, .5):
                    if num in gabors:
                        gabor_label = 'Gabor' + str(num)
                        ksize = 9
                        kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
                        kernels.append(kernel)

                        fimg = cv2.filter2D(gray, cv2.CV_8UC3, kernel)
                        filtered_img = fimg.reshape(-1)
                        df[gabor_label] = filtered_img
                        if print_gabor:
                            print(gabor_label, ': theta=', theta, ': sigma=', sigma, ': lamda=', lamda, ': gamma=', gamma)
                    num += 1

    #Roberts Edge
    roberts_edge = roberts(gray).reshape(-1)
    df['Roberts'] = roberts_edge

    #Sobel Edge
    sobel_edge = sobel(gray).reshape(-1)
    df['Sobel'] = sobel_edge

    #Scharr Edge
    scharr_edge = scharr(gray).reshape(-1)
    df['Scharr'] = scharr_edge

    #Prewitt Edge
    prewitt_edge = prewitt(gray).reshape(-1)
    df['Prewitt'] = prewitt_edge

    gaussian_img = nd.gaussian_filter(gray, sigma=3).reshape(-1)
    df['Gaussian s3'] = gaussian_img

    gaussian_img2 = nd.gaussian_filter(gray, sigma=7).reshape(-1)
    df['Gaussian s7'] = gaussian_img2

    median_img =  nd.median_filter(gray, size=3).reshape(-1)
    df['Median s3'] = median_img

    #segmentation
    #felzenszwalb
    segments_fz = felzenszwalb(img, scale=100, sigma=0.5, min_size=50)
    df['Felzenszwalb'] = segments_fz.reshape(-1)

    #quickshift
    segments_quick = quickshift(img, kernel_size=3, max_dist=6, ratio=0.5)
    df['Quickshift'] = segments_quick.reshape(-1)

    #slic
    segments_slic = slic(img, n_segments=250, compactness=10, sigma=1)
    df['SLIC'] = segments_slic.reshape(-1)

    #watershed
    gradient = filters.rank.gradient(gray, disk(2))
    segments_ws = watershed(gradient, markers=250, compactness=0.001)
    df['Watershed'] = segments_ws.reshape(-1)

    #labels
    if labeled:
        df['labels'] = mask.read(1).reshape(-1)

    return df

def data_prep():
    csv_df = pd.read_csv('../data/640/640.csv')

    #separate 10%(round up) of photos for testing
    num_images = len(csv_df)
    num_test_images = int(np.ceil(num_images * 0.1))
    test_indices = np.random.choice(num_images, num_test_images, replace=False)

    #write test indices to file
    with open('test_indices.txt', 'w') as f:
        for item in test_indices:
            f.write("%s\n" % item)

    #preprocessing of all training images
    images_df = image_to_df(os.path.join("../",csv_df['snow_path'][0]), os.path.join("../",csv_df['snowbinary_path'][0]), print_gabor=True)

    for i in tqdm(range(1, len(csv_df)), unit='image'):
        if i in test_indices:
            continue
        images_df = pd.concat([images_df, image_to_df(os.path.join("../",csv_df['snow_path'][i]), os.path.join("../",csv_df['snowbinary_path'][i]))])

    #remove all black pixels
    trimmed_df = images_df[(images_df[['Blue', 'Green', 'Red']] != 0).all(axis=1)]

    #train test split
    X = trimmed_df.drop(columns=['labels'], axis=1)
    y = trimmed_df['labels'].values

    return (X,y)


In [3]:
data = data_prep()

#store train test data with pickle
with open('X_y.pkl', 'wb') as f:
    pickle.dump(data, f)

Gabor4 : theta= 0.0 : sigma= 1 : lamda= 0.7853981633974483 : gamma= 0.5
Gabor5 : theta= 0.0 : sigma= 1 : lamda= 1.5707963267948966 : gamma= 0.05
Gabor6 : theta= 0.0 : sigma= 1 : lamda= 1.5707963267948966 : gamma= 0.5
Gabor8 : theta= 0.0 : sigma= 1 : lamda= 2.356194490192345 : gamma= 0.5
Gabor11 : theta= 0.0 : sigma= 3 : lamda= 0.7853981633974483 : gamma= 0.05
Gabor23 : theta= 0.7853981633974483 : sigma= 1 : lamda= 2.356194490192345 : gamma= 0.05


100%|██████████| 130/130 [12:00<00:00,  5.54s/image]


In [5]:
data[0].head()

Unnamed: 0,Blue,Green,Red,Gray,Gabor4,Gabor5,Gabor6,Gabor8,Gabor11,Gabor23,...,Sobel,Scharr,Prewitt,Gaussian s3,Gaussian s7,Median s3,Felzenszwalb,Quickshift,SLIC,Watershed
4118,33,46,61,49,184,136,112,98,62,129,...,0.27595,0.317628,0.22107,39,41,42,3,26,7,9
4119,31,67,92,70,121,0,0,0,30,255,...,0.22549,0.204687,0.258817,40,41,49,3,64,7,9
4120,62,94,117,97,202,211,128,110,210,54,...,0.24309,0.258036,0.231088,39,41,42,3,64,7,9
4756,23,49,66,51,173,102,91,77,38,4,...,0.388478,0.402723,0.369667,46,45,43,3,26,7,7
4757,34,39,54,43,189,114,40,0,232,115,...,0.387195,0.415826,0.351516,50,46,51,3,26,16,9
