In [3]:
import pandas as pd
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
import rasterio
import joblib
import pickle

from tqdm import tqdm
from skimage.filters import roberts, sobel, scharr, prewitt
from skimage.segmentation import felzenszwalb, quickshift, slic, watershed
from skimage import filters
from skimage.morphology import disk
from scipy import ndimage as nd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from yellowbrick.classifier import ROCAUC
from sklearn.metrics import confusion_matrix

In [4]:
def image_to_df(snow_path, mask_path, print_gabor=False, labeled=True):
    #load in images
    img = cv2.imread(snow_path)
    if labeled:
        mask = rasterio.open(mask_path)
    
    #generate grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    #flatten image
    img2 = img.reshape((-1,3))

    #create dataframe
    df = pd.DataFrame(img2, columns=['Blue', 'Green', 'Red'])
    df['Gray'] = gray.reshape(-1)

    #gabor filter
    num = 1
    gabors = [5, 11, 23, 8, 6, 4]
    kernels = []
    for theta in range(2):
        theta = theta / 4. * np.pi
        for sigma in (1,3):
            for lamda in np.arange(0, np.pi, np.pi/4):
                for gamma in (.05, .5):
                    if num in gabors:
                        gabor_label = 'Gabor' + str(num)
                        ksize = 9
                        kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
                        kernels.append(kernel)

                        fimg = cv2.filter2D(gray, cv2.CV_8UC3, kernel)
                        filtered_img = fimg.reshape(-1)
                        df[gabor_label] = filtered_img
                        if print_gabor:
                            print(gabor_label, ': theta=', theta, ': sigma=', sigma, ': lamda=', lamda, ': gamma=', gamma)
                    num += 1

    #Roberts Edge
    roberts_edge = roberts(gray).reshape(-1)
    df['Roberts'] = roberts_edge

    #Sobel Edge
    sobel_edge = sobel(gray).reshape(-1)
    df['Sobel'] = sobel_edge

    #Scharr Edge
    scharr_edge = scharr(gray).reshape(-1)
    df['Scharr'] = scharr_edge

    #Prewitt Edge
    prewitt_edge = prewitt(gray).reshape(-1)
    df['Prewitt'] = prewitt_edge

    gaussian_img = nd.gaussian_filter(gray, sigma=3).reshape(-1)
    df['Gaussian s3'] = gaussian_img

    gaussian_img2 = nd.gaussian_filter(gray, sigma=7).reshape(-1)
    df['Gaussian s7'] = gaussian_img2

    median_img =  nd.median_filter(gray, size=3).reshape(-1)
    df['Median s3'] = median_img

    #segmentation
    #felzenszwalb
    segments_fz = felzenszwalb(img, scale=100, sigma=0.5, min_size=50)
    df['Felzenszwalb'] = segments_fz.reshape(-1)

    #quickshift
    segments_quick = quickshift(img, kernel_size=3, max_dist=6, ratio=0.5)
    df['Quickshift'] = segments_quick.reshape(-1)

    #slic
    segments_slic = slic(img, n_segments=250, compactness=10, sigma=1)
    df['SLIC'] = segments_slic.reshape(-1)

    #watershed
    gradient = filters.rank.gradient(gray, disk(2))
    segments_ws = watershed(gradient, markers=250, compactness=0.001)
    df['Watershed'] = segments_ws.reshape(-1)

    # blurring of segmentations
    segments_fz_blur = nd.gaussian_filter(segments_fz, sigma=3).reshape(-1)
    df['Felzenszwalb Blur'] = segments_fz_blur

    segments_quick_blur = nd.gaussian_filter(segments_quick, sigma=3).reshape(-1)
    df['Quickshift Blur'] = segments_quick_blur

    segments_slic_blur = nd.gaussian_filter(segments_slic, sigma=3).reshape(-1)
    df['SLIC Blur'] = segments_slic_blur

    segments_ws_blur = nd.gaussian_filter(segments_ws, sigma=3).reshape(-1)
    df['Watershed Blur'] = segments_ws_blur

    #labels
    if labeled:
        df['labels'] = mask.read(1).reshape(-1)

    return df

In [5]:
#read in the data
csv_df = pd.read_csv('data/640/640.csv')

images_df = image_to_df(csv_df['snow_path'][0], csv_df['snowbinary_path'][0], print_gabor=True)

#pick 13 random indices to not include in the training set
random_indices = np.random.choice(len(csv_df), 13, replace=False)
print(random_indices)
for i in tqdm(range(1, len(csv_df)), unit='image'):
    if i in random_indices:
        continue
    images_df = pd.concat([images_df, image_to_df(csv_df['snow_path'][i], csv_df['snowbinary_path'][i])])
images_df.shape

Gabor4 : theta= 0.0 : sigma= 1 : lamda= 0.7853981633974483 : gamma= 0.5
Gabor5 : theta= 0.0 : sigma= 1 : lamda= 1.5707963267948966 : gamma= 0.05
Gabor6 : theta= 0.0 : sigma= 1 : lamda= 1.5707963267948966 : gamma= 0.5
Gabor8 : theta= 0.0 : sigma= 1 : lamda= 2.356194490192345 : gamma= 0.5
Gabor11 : theta= 0.0 : sigma= 3 : lamda= 0.7853981633974483 : gamma= 0.05
Gabor23 : theta= 0.7853981633974483 : sigma= 1 : lamda= 2.356194490192345 : gamma= 0.05
[ 72  41  53  15  81  95  59 119  24 118 121  46   9]


100%|██████████| 130/130 [09:34<00:00,  4.42s/image]


(47160320, 26)

In [6]:
#remove all black pixels
trimmed_df = images_df[(images_df[['Blue', 'Green', 'Red']] != 0).all(axis=1)]
trimmed_df.shape

(22051598, 26)

In [7]:
X = trimmed_df.drop(columns=['labels'], axis=1)
y = trimmed_df['labels'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
#define the model
params =  {
        'criterion': 'entropy', 
        'max_depth': 20, 
        'max_features': 'log2', 
        'min_samples_leaf': 4, 
        'min_samples_split': 5, 
        'n_estimators': 50,
        'n_jobs': 16,
        'verbose': 1,
    }
rf = RandomForestClassifier(**params)

In [9]:
rf.fit(X_train, y_train)

[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:  6.2min
[Parallel(n_jobs=16)]: Done  50 out of  50 | elapsed: 10.9min finished


In [10]:
# get feature importance
feature_list = list(X.columns)
feature_imp = pd.Series(rf.feature_importances_, index=feature_list).sort_values(ascending=False)
feature_imp

Blue                 0.306930
Median s3            0.088704
Gaussian s3          0.082494
Roberts              0.081173
Scharr               0.052427
Gray                 0.049059
Green                0.046505
Prewitt              0.039507
Sobel                0.035139
Red                  0.031021
Felzenszwalb Blur    0.028073
Gaussian s7          0.027773
Felzenszwalb         0.025533
Gabor6               0.019736
Quickshift           0.013197
Watershed            0.012853
Quickshift Blur      0.011224
Watershed Blur       0.010085
SLIC                 0.007799
Gabor23              0.007334
SLIC Blur            0.006555
Gabor4               0.005657
Gabor5               0.005581
Gabor8               0.005170
Gabor11              0.000471
dtype: float64