# Simple Building Segmentation with PCA
Adapted from Brandon Anderson (Stanford RegLab)

In [1]:
import glob
import pickle

import matplotlib as mpl 
import matplotlib.pyplot as plt
import rasterio

import numpy as np
from sklearn import decomposition

### Convert images to arrays

In [3]:
img_arrs = []
images = []

files = glob.glob('/datadrive/data/raw/planet_images_il-2020-07/*')
for file in files:
    with rasterio.open(file) as src:
        b, g, r, n = src.read()
        
    rgb = np.stack((r,g,b), axis=-1)
    rgb = rgb/rgb.max()    
    images.append(rgb)
    
    img_arr = rgb.reshape(-1, rgb.shape[-1])
    img_arrs.append(img_arr)

img_arr.shape

(115254, 3)

### Fit PCA

In [None]:
N_COMPONENTS = 2

all_images = np.concatenate(img_arrs)
pca = decomposition.PCA(n_components=N_COMPONENTS)
img_pca = pca.fit_transform(all_images)

### Perform Segmentation

In [None]:
PERCENTILE_CUT = 97
CUT = np.percentile(img_pca.flatten(), PERCENTILE_CUT)

def perform_segmentation(pca_obj, arr, shape):
    pca_img = pca_obj.transform(arr).reshape((shape[0], shape[1], N_COMPONENTS))[:, :, 0]
    
    pca_img[pca_img <= CUT] = 0
    pca_img[pca_img > CUT] = 1
    
    return pca_img

In [None]:
pca_masks = []

for i in range(len(img_arrs)):
    mask = perform_segmentation(pca, img_arrs[i], images[i].shape)
    orig_img = images[i]
    
    plt.imshow(orig_img)
    
    cafo = np.zeros((orig_img.shape[0],orig_img.shape[1],4))
    cafo[...,0] = 1.
    cafo[...,3] = mask
    
    background = np.zeros((orig_img.shape[0],orig_img.shape[1],4))
    background[...,1] = 1.
    background[...,3] = mask != 1
    
    plt.imshow(cafo, alpha=.5, interpolation='none')
    plt.imshow(background, alpha=.2, interpolation='none')
#     plt.savefig('./segmented/' + str(i).zfill(3) + '.png', dpi=300)
#     print('./segmented/' + str(i).zfill(3) + '.png')
    plt.show()