## Core Workflow: Calculating mean band values from the rooftop imageries
Purpose: Calculate band values from the rooftop imageries using pixels selected through multiple steps. The pixel selection method involves finding the different group of pixels within an imagery based on their band values. Pick the group of pixels whose calculated albedo values are closest to the expected albedo values and which contains at least 20% of total pixels within the imagery. A random twenty pixels are then selected from that group for albedo prediction.
<br>
*Author: Taufiq Rashid*


### Import statements

In [10]:
import warnings
warnings.filterwarnings('ignore')
#
import os
import sys
import json
import itertools
import pickle
from pprint import pprint
#
import numpy as np
import shapely
from shapely.geometry import shape, Point
from shapely.geometry import mapping, Polygon
# import cartopy
import geojson
import fiona
import h5py
get_ipython().magic(u'matplotlib inline')
import matplotlib as mpl
import matplotlib.pyplot as plt

import gdal
from glob import glob

import jenkspy

import rasterio as rio
from rasterio.plot import show

import pandas as pd

import collections
from numpy import mean

import random
import statistics

import descarteslabs as dl
print (sys.path)

['', '/opt/caffe/python', '/opt/caffe2/build', '/data/home/peter/notebooks/urban_heat/dev', '/anaconda/envs/py36/lib/python36.zip', '/anaconda/envs/py36/lib/python3.6', '/anaconda/envs/py36/lib/python3.6/lib-dynload', '/anaconda/envs/py36/lib/python3.6/site-packages', '/anaconda/envs/py36/lib/python3.6/site-packages/IPython/extensions', '/data/home/peter/.ipython']


### load the path to imagery

In [11]:
# Set your input file here
path_data = 'path_gg_img_4-6.csv'

# Read the data to a Pandas Dataframe
path_df = pd.read_csv(path_data, encoding='utf8')
print(path_df)

img_info= path_df[['img_path','tile_id','expected_albedo','latitude','longitude','roof_address','footprint_shapes',]].apply(tuple, axis=1)



    Unnamed: 0                                       roof_address  \
0            0  Juniper HallPlummer St, Northridge, Los Angele...   
1            1  Bayramian HallVincennes St, Northridge, Los An...   
2            2  John Wooden Center, 221 Westwood Plaza, Los An...   
3            3    603 Charles E Young Dr E, Los Angeles, CA 90024   
4            4    603 Charles E Young Dr E, Los Angeles, CA 90024   
5            5       849 S Oxford Ave, Los Angeles, CA 90005, USA   
6            6    1014 Grand Central Ave, Glendale, CA 91201, USA   
7            7  1445 N Montebello Blvd, Montebello, CA 90640, USA   
8            8           601 Circle 7 Dr, Glendale, CA 91201, USA   
9            9    264 Redondo AveLong Beach, Long Beach, CA 90803   
10          10             650 Capitol Mall, Sacramento, CA 95814   
11          11             650 Capitol Mall, Sacramento, CA 95814   
12          12        400 Mrak Hall Dr, Davis Sacramento CA 95616   
13          13        400 Mrak Hal

## Set the values of the coefficients

In [12]:
cb = 0.17
cg = -0.13
cr = 0.33
ci = 0.54

In [13]:
# Jenks Natural Breaks works by optimizing the Goodness of Variance Fit, a value from 0 to 1 where 0 = No Fit 
# and 1 = Perfect Fit. using the function below to find the optimum number of groups

def goodness_of_variance_fit(array, classes):
    # get the break points
    classes = jenkspy.jenks_breaks(array, classes)

    # do the actual classification
    classified = np.array([classify(i, classes) for i in array])

    # max value of zones
    maxz = max(classified)

    # nested list of zone indices
    zone_indices = [[idx for idx, val in enumerate(classified) if zone + 1 == val] for zone in range(maxz)]

    # sum of squared deviations from array mean
    sdam = np.sum((array - array.mean()) ** 2)

    # sorted polygon stats
    array_sort = [np.array([array[index] for index in zone]) for zone in zone_indices]

    # sum of squared deviations of class means
    sdcm = sum([np.sum((classified - classified.mean()) ** 2) for classified in array_sort])

    # goodness of variance fit
    gvf = (sdam - sdcm) / sdam

    return gvf

def classify(value, breaks):
    for i in range(1, len(breaks)):
        if value < breaks[i]:
            return i
    return len(breaks) - 1

def frange(start, stop, step=1.0):
    ''' "range()" like function which accept float type''' 
    i = start
    while i < stop:
        yield i
        i += step

### Calculate band values from twenty random pixels from the optimum group of pixels

In [15]:
# create lists to hold all the necessary outputs
r_m = []
g_m = []
b_m = []
n_m = []
lat=[]
lon=[]
roofs = []
all_bands = []
red_std=[]
green_std=[]
blue_std=[]
nir_std=[]
mean_albedos=[]
exp_values = []
img_path = []
tile_id = []
roof_add=[]
footprint_shapes=[]

### Calculate band values for each imageries in the path
for Y, X in img_info.iteritems():
    imageries = X[0]
    print(imageries)
    tile = X[1]
    exp_val = X[2]
    latitude = X[3]
    longitude = X[4]
    rf_ad=X[5]
    ft_shp=X[6]

    with rio.open(imageries) as src:
        naip_data = src.read()
        naip_meta = src.profile
        
    ### Normalize the band values
    red = naip_data[0]
    red = red.astype(float)
    red_max = red.max()
    red = red/255
    red = np.clip(red,0.0,1.0)

    green = naip_data[1]
    green = green.astype(float)
    green_max = green.max()
    green = green/255
    green = np.clip(green,0.0,1.0)

    blue = naip_data[2]
    blue = blue.astype(float)
    blue_max = blue.max()
    blue = blue/255
    blue = np.clip(blue,0.0,1.0)

    nir = naip_data[3]
    nir = nir.astype(float)
    nir_max = nir.max()
    nir = nir/255
    nir = np.clip(nir,0.0,1.0)

    arr = [red,green,blue,nir]

    flat_arr = []
    # flattened array of tuples
    flat_list = zip(*map(lambda x:x.flatten(),arr))
    for i in flat_list:
        flat_arr.append(i)   

    albedos=[]
    selected_pixels=[]
    ### Calculate solar reflectance using equation from Ban-Weiss et al.
    for pixels in flat_arr:
        solar_reflectance = ((cb*pixels[2])+(cg*pixels[1])+(cr*pixels[0])+(ci*pixels[3]))
        if solar_reflectance != 0:
            albedos.append(solar_reflectance)
            selected_pixels.append(pixels)

    # create a numpy array of albedos for breaks calculation        
    albedos_arr = np.asarray(albedos)
    
    ### Use solar reflectance values to group the pixels
    # use goodness of variance to find optimum number of classes
    gvf = 0.0
    nclasses = 2
    while gvf < .90:
        gvf = goodness_of_variance_fit(albedos_arr, nclasses)
        nclasses += 1

    # Create cluster of pixels using jenks breaks
    breaks = jenkspy.jenks_breaks(albedos, nclasses)

    if nclasses == 3:
    # count no of pixels in each breaks

        cnt_1_brk = 0
        cnt_2_brk = 0
        cnt_3_brk = 0

        for pixels in albedos:
            if pixels < breaks[1]:
                cnt_1_brk = cnt_1_brk + 1
            if breaks[1] <= pixels < breaks[2]:
                cnt_2_brk = cnt_2_brk + 1
            if breaks[2] <= pixels:
                cnt_3_brk = cnt_3_brk + 1

        total_pixels_brk = cnt_1_brk+cnt_2_brk+cnt_3_brk

        prcnt_brk = []

        prcnt_1_brk = cnt_1_brk/total_pixels_brk
        prcnt_2_brk = cnt_2_brk/total_pixels_brk
        prcnt_3_brk = cnt_3_brk/total_pixels_brk

        top_breaks = {}

        t_grps = []

        if prcnt_1_brk > 0.20:
            top_breaks.update({breaks[0] : breaks[1]})
            t_grps.append(breaks[0])
            t_grps.append(breaks[1])

        if prcnt_2_brk > 0.20:
            top_breaks.update({breaks[1] : breaks[2]})
            t_grps.append(breaks[1])
            t_grps.append(breaks[2])

        if prcnt_3_brk > 0.20:
            top_breaks.update({breaks[2] : breaks[3]})
            t_grps.append(breaks[2])
            t_grps.append(breaks[3])


        pick_1='False'
        pick_2='False'
        pick_3='False'

        f_group = []

        if exp_val < breaks[1]:
            if prcnt_1_brk > 0.20:
                pick_1 = 'True'
                f_group.append(breaks[0])
                f_group.append(breaks[1])
        if breaks[1] <exp_val < breaks[2]:
            if prcnt_2_brk > 0.20:
                pick_2 = 'True'
                f_group.append(breaks[1])
                f_group.append(breaks[2])
        if breaks[2] < exp_val:
            if prcnt_3_brk > 0.20:
                pick_3 = 'True'
                f_group.append(breaks[2])
                f_group.append(breaks[3])

        if pick_1 == 'False' and pick_2 == 'False' and pick_3 == 'False':
            closest_grp = min(t_grps, key=lambda x:abs(x-exp_val))

            for a,b in top_breaks.items(): 
                if closest_grp == a or  closest_grp == b:
                    f_group.append(a)
                    f_group.append(b)

        pixel_num = []
        pixel_no = -1
        top_albedos = []

        # store the top albedos and the pixel number that belong to those albedos
        for pixels in albedos:
            pixel_no = pixel_no + 1
            if f_group[0] < pixels < f_group[1]:
                top_albedos.append(pixels)
                if pixel_no not in pixel_num:
                    pixel_num.append(pixel_no)

    if nclasses == 4:
        # count no of pixels in each breaks

        cnt_1_brk = 0
        cnt_2_brk = 0
        cnt_3_brk = 0
        cnt_4_brk = 0

        for pixels in albedos:
            if pixels < breaks[1]:
                cnt_1_brk = cnt_1_brk + 1
            if breaks[1] <= pixels < breaks[2]:
                cnt_2_brk = cnt_2_brk + 1
            if breaks[2] <= pixels < breaks[3]:
                cnt_3_brk = cnt_3_brk + 1
            if breaks[3] <= pixels:
                cnt_4_brk = cnt_4_brk + 1

        total_pixels_brk = cnt_1_brk+cnt_2_brk+cnt_3_brk+cnt_4_brk

        prcnt_brk = []

        prcnt_1_brk = cnt_1_brk/total_pixels_brk
        prcnt_2_brk = cnt_2_brk/total_pixels_brk
        prcnt_3_brk = cnt_3_brk/total_pixels_brk
        prcnt_4_brk = cnt_4_brk/total_pixels_brk

        top_breaks = {}

        t_grps = []

        if prcnt_1_brk > 0.20:
            top_breaks.update({breaks[0] : breaks[1]})
            t_grps.append(breaks[0])
            t_grps.append(breaks[1])

        if prcnt_2_brk > 0.20:
            top_breaks.update({breaks[1] : breaks[2]})
            t_grps.append(breaks[1])
            t_grps.append(breaks[2])

        if prcnt_3_brk > 0.20:
            top_breaks.update({breaks[2] : breaks[3]})
            t_grps.append(breaks[2])
            t_grps.append(breaks[3])

        if prcnt_4_brk > 0.20:
            top_breaks.update({breaks[3] : breaks[4]})
            t_grps.append(breaks[3])
            t_grps.append(breaks[4])


        pick_1='False'
        pick_2='False'
        pick_3='False'
        pick_4='False'

        f_group = []

        if exp_val < breaks[1]:
            if prcnt_1_brk > 0.20:
                pick_1 = 'True'
                f_group.append(breaks[0])
                f_group.append(breaks[1])
        if breaks[1] <exp_val < breaks[2]:
            if prcnt_2_brk > 0.20:
                pick_2 = 'True'
                f_group.append(breaks[1])
                f_group.append(breaks[2])
        if breaks[2] < exp_val < breaks[3]:
            if prcnt_3_brk > 0.20:
                pick_3 = 'True'
                f_group.append(breaks[2])
                f_group.append(breaks[3])
        if breaks[3] < exp_val:
            if prcnt_4_brk > 0.20:
                pick_4 = 'True'
                f_group.append(breaks[3])
                f_group.append(breaks[4])

        if pick_1 == 'False' and pick_2 == 'False' and pick_3 == 'False' and pick_4 == 'False':
            closest_grp = min(t_grps, key=lambda x:abs(x-exp_val))

            for a,b in top_breaks.items(): 
                if closest_grp == a or  closest_grp == b:
                    f_group.append(a)
                    f_group.append(b)

        pixel_num = []
        pixel_no = -1
        top_albedos = []

        # store the top albedos and the pixel number that belong to those albedos
        for pixels in albedos:
            pixel_no = pixel_no + 1
            if f_group[0] < pixels < f_group[1]:
                top_albedos.append(pixels)
                if pixel_no not in pixel_num:
                    pixel_num.append(pixel_no)   

    if nclasses == 5:
        cnt_1_brk = 0
        cnt_2_brk = 0
        cnt_3_brk = 0
        cnt_4_brk = 0
        cnt_5_brk = 0

        for pixels in albedos:
            if pixels < breaks[1]:
                cnt_1_brk = cnt_1_brk + 1
            if breaks[1] <= pixels < breaks[2]:
                cnt_2_brk = cnt_2_brk + 1
            if breaks[2] <= pixels < breaks[3]:
                cnt_3_brk = cnt_3_brk + 1
            if breaks[3] <= pixels < breaks[4]:
                cnt_4_brk = cnt_4_brk + 1
            if breaks[4] <= pixels:
                cnt_5_brk = cnt_5_brk + 1
        total_pixels_brk = cnt_1_brk+cnt_2_brk+cnt_3_brk+cnt_4_brk+cnt_5_brk

        prcnt_brk = []

        prcnt_1_brk = cnt_1_brk/total_pixels_brk
        prcnt_2_brk = cnt_2_brk/total_pixels_brk
        prcnt_3_brk = cnt_3_brk/total_pixels_brk
        prcnt_4_brk = cnt_4_brk/total_pixels_brk
        prcnt_5_brk = cnt_5_brk/total_pixels_brk

        top_breaks = {}

        t_grps = []

        if prcnt_1_brk > 0.20:
            top_breaks.update({breaks[0] : breaks[1]})
            t_grps.append(breaks[0])
            t_grps.append(breaks[1])

        if prcnt_2_brk > 0.20:
            top_breaks.update({breaks[1] : breaks[2]})
            t_grps.append(breaks[1])
            t_grps.append(breaks[2])

        if prcnt_3_brk > 0.20:
            top_breaks.update({breaks[2] : breaks[3]})
            t_grps.append(breaks[2])
            t_grps.append(breaks[3])

        if prcnt_4_brk > 0.20:
            top_breaks.update({breaks[3] : breaks[4]})
            t_grps.append(breaks[3])
            t_grps.append(breaks[4])

        if prcnt_5_brk > 0.20:
            top_breaks.update({breaks[4] : breaks[5]})
            t_grps.append(breaks[4])
            t_grps.append(breaks[5])

        pick_1='False'
        pick_2='False'
        pick_3='False'
        pick_4='False'
        pick_5='False'

        f_group = []

        if breaks[4] < exp_val:
            if prcnt_5_brk > 0.20:
                pick_5 = 'True'
                f_group.append(breaks[4])
                f_group.append(breaks[5])
        if breaks[3] < exp_val < breaks[4]:
            if prcnt_4_brk > 0.20:
                pick_4 = 'True'
                f_group.append(breaks[3])
                f_group.append(breaks[4])
        if breaks[2] < exp_val < breaks[3]:
            if prcnt_3_brk > 0.20:
                pick_3 = 'True'
                f_group.append(breaks[2])
                f_group.append(breaks[3])
        if breaks[1] < exp_val < breaks[2]:
            if prcnt_2_brk > 0.20:
                pick_2 = 'True'
                f_group.append(breaks[1])
                f_group.append(breaks[2])
        if exp_val < breaks[1]:
            if prcnt_1_brk > 0.20:
                pick_1 = 'True'
                f_group.append(breaks[0])
                f_group.append(breaks[1])

        if pick_1 == 'False' and pick_2 == 'False' and pick_3 == 'False' and pick_4 == 'False' and pick_5 == 'False':
            closest_grp = min(t_grps, key=lambda x:abs(x-exp_val))

            for a,b in top_breaks.items(): 
                if closest_grp == a or  closest_grp == b:
                    f_group.append(a)
                    f_group.append(b)

        pixel_num = []
        pixel_no = -1
        top_albedos = []

        # store the top albedos and the pixel number that belong to those albedos
        for pixels in albedos:
            pixel_no = pixel_no + 1
            if f_group[0] < pixels < f_group[1]:
                top_albedos.append(pixels)
                if pixel_no not in pixel_num:
                    pixel_num.append(pixel_no)  


    if nclasses == 6:
        cnt_1_brk = 0
        cnt_2_brk = 0
        cnt_3_brk = 0
        cnt_4_brk = 0
        cnt_5_brk = 0
        cnt_6_brk = 0

        for pixels in albedos:
            if pixels < breaks[1]:
                cnt_1_brk = cnt_1_brk + 1
            if breaks[1] <= pixels < breaks[2]:
                cnt_2_brk = cnt_2_brk + 1
            if breaks[2] <= pixels < breaks[3]:
                cnt_3_brk = cnt_3_brk + 1
            if breaks[3] <= pixels < breaks[4]:
                cnt_4_brk = cnt_4_brk + 1
            if breaks[4] <= pixels <= breaks[5]:
                cnt_5_brk = cnt_5_brk + 1
            if breaks[5] <= pixels:
                cnt_6_brk = cnt_6_brk + 1
        total_pixels_brk = cnt_1_brk+cnt_2_brk+cnt_3_brk+cnt_4_brk+cnt_5_brk+cnt_6_brk

        prcnt_brk = []

        prcnt_1_brk = cnt_1_brk/total_pixels_brk
        prcnt_2_brk = cnt_2_brk/total_pixels_brk
        prcnt_3_brk = cnt_3_brk/total_pixels_brk
        prcnt_4_brk = cnt_4_brk/total_pixels_brk
        prcnt_5_brk = cnt_5_brk/total_pixels_brk
        prcnt_6_brk = cnt_6_brk/total_pixels_brk

        top_breaks = {}

        t_grps = []

        if prcnt_1_brk > 0.20:
            top_breaks.update({breaks[0] : breaks[1]})
            t_grps.append(breaks[0])
            t_grps.append(breaks[1])

        if prcnt_2_brk > 0.20:
            top_breaks.update({breaks[1] : breaks[2]})
            t_grps.append(breaks[1])
            t_grps.append(breaks[2])

        if prcnt_3_brk > 0.20:
            top_breaks.update({breaks[2] : breaks[3]})
            t_grps.append(breaks[2])
            t_grps.append(breaks[3])

        if prcnt_4_brk > 0.20:
            top_breaks.update({breaks[3] : breaks[4]})
            t_grps.append(breaks[3])
            t_grps.append(breaks[4])

        if prcnt_5_brk > 0.20:
            top_breaks.update({breaks[4] : breaks[5]})
            t_grps.append(breaks[4])
            t_grps.append(breaks[5])

        if prcnt_6_brk > 0.20:
            top_breaks.update({breaks[5] : breaks[6]})
            t_grps.append(breaks[5])
            t_grps.append(breaks[6])

        pick_1='False'
        pick_2='False'
        pick_3='False'
        pick_4='False'
        pick_5='False'
        pick_6='False'

        f_group = []

        if breaks[5] < exp_val:
            if prcnt_6_brk > 0.20:
                pick_6 = 'True'
                f_group.append(breaks[5])
                f_group.append(breaks[6])
        if breaks[4] < exp_val < breaks[5]:
            if prcnt_5_brk > 0.20:
                pick_5 = 'True'
                f_group.append(breaks[4])
                f_group.append(breaks[5])
        if breaks[3] < exp_val < breaks[4]:
            if prcnt_4_brk > 0.20:
                pick_4 = 'True'
                f_group.append(breaks[3])
                f_group.append(breaks[4])
        if breaks[2] < exp_val < breaks[3]:
            if prcnt_3_brk > 0.20:
                pick_3 = 'True'
                f_group.append(breaks[2])
                f_group.append(breaks[3])
        if breaks[1] < exp_val < breaks[2]:
            if prcnt_2_brk > 0.20:
                pick_2 = 'True'
                f_group.append(breaks[1])
                f_group.append(breaks[2])
        if exp_val < breaks[1]:
            if prcnt_1_brk > 0.20:
                pick_1 = 'True'
                f_group.append(breaks[0])
                f_group.append(breaks[1])

        if pick_1 == 'False' and pick_2 == 'False' and pick_3 == 'False' and pick_4 == 'False' and pick_5 == 'False' and pick_6 == 'False':

            closest_grp = min(t_grps, key=lambda x:abs(x-exp_val))

            for a,b in top_breaks.items(): 
                if closest_grp == a or  closest_grp == b:
                    f_group.append(a)
                    f_group.append(b)

        pixel_num = []
        pixel_no = -1
        top_albedos = []

        # store the top albedos and the pixel number that belong to those albedos
        for pixels in albedos:
            pixel_no = pixel_no + 1
            if f_group[0] < pixels < f_group[1]:
                top_albedos.append(pixels)
                if pixel_no not in pixel_num:
                    pixel_num.append(pixel_no) 

    if nclasses == 7:
        cnt_1_brk = 0
        cnt_2_brk = 0
        cnt_3_brk = 0
        cnt_4_brk = 0
        cnt_5_brk = 0
        cnt_6_brk = 0
        cnt_7_brk = 0

        for pixels in albedos:
            if pixels < breaks[1]:
                cnt_1_brk = cnt_1_brk + 1
            if breaks[1] <= pixels < breaks[2]:
                cnt_2_brk = cnt_2_brk + 1
            if breaks[2] <= pixels < breaks[3]:
                cnt_3_brk = cnt_3_brk + 1
            if breaks[3] <= pixels < breaks[4]:
                cnt_4_brk = cnt_4_brk + 1
            if breaks[4] <= pixels <= breaks[5]:
                cnt_5_brk = cnt_5_brk + 1
            if breaks[5] <= pixels <= breaks[6]:
                cnt_6_brk = cnt_6_brk + 1
            if breaks[6] <= pixels:
                cnt_7_brk = cnt_7_brk + 1
        total_pixels_brk = cnt_1_brk+cnt_2_brk+cnt_3_brk+cnt_4_brk+cnt_5_brk+cnt_6_brk+cnt_7_brk

        prcnt_1_brk = cnt_1_brk/total_pixels_brk
        prcnt_2_brk = cnt_2_brk/total_pixels_brk
        prcnt_3_brk = cnt_3_brk/total_pixels_brk
        prcnt_4_brk = cnt_4_brk/total_pixels_brk
        prcnt_5_brk = cnt_5_brk/total_pixels_brk
        prcnt_5_brk = cnt_5_brk/total_pixels_brk
        prcnt_6_brk = cnt_6_brk/total_pixels_brk
        prcnt_7_brk = cnt_7_brk/total_pixels_brk

        prcnt_brk = []

        top_breaks = {}
        t_grps = []

        if prcnt_1_brk > 0.20:
            top_breaks.update({breaks[0] : breaks[1]})
            t_grps.append(breaks[0])
            t_grps.append(breaks[1])

        if prcnt_2_brk > 0.20:
            top_breaks.update({breaks[1] : breaks[2]})
            t_grps.append(breaks[1])
            t_grps.append(breaks[2])

        if prcnt_3_brk > 0.20:
            top_breaks.update({breaks[2] : breaks[3]})
            t_grps.append(breaks[2])
            t_grps.append(breaks[3])

        if prcnt_4_brk > 0.20:
            top_breaks.update({breaks[3] : breaks[4]})
            t_grps.append(breaks[3])
            t_grps.append(breaks[4])

        if prcnt_5_brk > 0.20:
            top_breaks.update({breaks[4] : breaks[5]})
            t_grps.append(breaks[4])
            t_grps.append(breaks[5])

        if prcnt_6_brk > 0.20:
            top_breaks.update({breaks[5] : breaks[6]})
            t_grps.append(breaks[5])
            t_grps.append(breaks[6])

        if prcnt_7_brk > 0.20:
            top_breaks.update({breaks[6] : breaks[7]})
            t_grps.append(breaks[6])
            t_grps.append(breaks[7])

        pick_1='False'
        pick_2='False'
        pick_3='False'
        pick_4='False'
        pick_5='False'
        pick_6='False'
        pick_7='False'

        f_group = []

        if breaks[6] < exp_val:
            if prcnt_7_brk > 0.20:
                pick_7 = 'True'
                f_group.append(breaks[6])
                f_group.append(breaks[7])
        if breaks[5] < exp_val < breaks[6]:
            if prcnt_6_brk > 0.20:
                pick_6 = 'True'
                f_group.append(breaks[5])
                f_group.append(breaks[6])
        if breaks[4] < exp_val < breaks[5]:
            if prcnt_5_brk > 0.20:
                pick_5 = 'True'
                f_group.append(breaks[4])
                f_group.append(breaks[5])
        if breaks[3] < exp_val < breaks[4]:
            if prcnt_4_brk > 0.20:
                pick_4 = 'True'
                f_group.append(breaks[3])
                f_group.append(breaks[4])
        if breaks[2] < exp_val < breaks[3]:
            if prcnt_3_brk > 0.20:
                pick_3 = 'True'
                f_group.append(breaks[2])
                f_group.append(breaks[3])
        if breaks[1] < exp_val < breaks[2]:
            if prcnt_2_brk > 0.20:
                pick_2 = 'True'
                f_group.append(breaks[1])
                f_group.append(breaks[2])
        if exp_val < breaks[1]:
            if prcnt_1_brk > 0.20:
                pick_1 = 'True' 
                f_group.append(breaks[0])
                f_group.append(breaks[1])

        if pick_1 == 'False' and pick_2 == 'False' and pick_3 == 'False' and pick_4 == 'False' and pick_5 == 'False' and pick_6 == 'False' and pick_7 == 'False':
            closest_grp = min(t_grps, key=lambda x:abs(x-exp_val))

            for a,b in top_breaks.items(): 
                if closest_grp == a or  closest_grp == b:
                    f_group.append(a)
                    f_group.append(b)

        pixel_num = []
        pixel_no = -1
        top_albedos = []

        # store the top albedos and the pixel number that belong to those albedos
        for pixels in albedos:
            pixel_no = pixel_no + 1
            if f_group[0] < pixels < f_group[1]:
                top_albedos.append(pixels)
                if pixel_no not in pixel_num:
                    pixel_num.append(pixel_no)  

    if nclasses == 8:
        cnt_1_brk = 0
        cnt_2_brk = 0
        cnt_3_brk = 0
        cnt_4_brk = 0
        cnt_5_brk = 0
        cnt_6_brk = 0
        cnt_7_brk = 0
        cnt_8_brk = 0

        for pixels in albedos:
            if pixels < breaks[1]:
                cnt_1_brk = cnt_1_brk + 1
            if breaks[1] <= pixels < breaks[2]:
                cnt_2_brk = cnt_2_brk + 1
            if breaks[2] <= pixels < breaks[3]:
                cnt_3_brk = cnt_3_brk + 1
            if breaks[3] <= pixels < breaks[4]:
                cnt_4_brk = cnt_4_brk + 1
            if breaks[4] <= pixels <= breaks[5]:
                cnt_5_brk = cnt_5_brk + 1
            if breaks[5] <= pixels <= breaks[6]:
                cnt_6_brk = cnt_6_brk + 1
            if breaks[6] <= pixels <= breaks[7]:
                cnt_7_brk = cnt_7_brk + 1
            if breaks[8] <= pixels:
                cnt_8_brk = cnt_8_brk + 1

        total_pixels_brk = cnt_1_brk+cnt_2_brk+cnt_3_brk+cnt_4_brk+cnt_5_brk+cnt_6_brk+cnt_7_brk+cnt_8_brk

        prcnt_1_brk = cnt_1_brk/total_pixels_brk
        prcnt_2_brk = cnt_2_brk/total_pixels_brk
        prcnt_3_brk = cnt_3_brk/total_pixels_brk
        prcnt_4_brk = cnt_4_brk/total_pixels_brk
        prcnt_5_brk = cnt_5_brk/total_pixels_brk
        prcnt_5_brk = cnt_5_brk/total_pixels_brk
        prcnt_6_brk = cnt_6_brk/total_pixels_brk
        prcnt_7_brk = cnt_7_brk/total_pixels_brk
        prcnt_8_brk = cnt_8_brk/total_pixels_brk

        prcnt_brk = []

        top_breaks = {}

        t_grps = []

        if prcnt_1_brk > 0.20:
            top_breaks.update({breaks[0] : breaks[1]})
            t_grps.append(breaks[0])
            t_grps.append(breaks[1])

        if prcnt_2_brk > 0.20:
            top_breaks.update({breaks[1] : breaks[2]})
            t_grps.append(breaks[1])
            t_grps.append(breaks[2])

        if prcnt_3_brk > 0.20:
            top_breaks.update({breaks[2] : breaks[3]})
            t_grps.append(breaks[2])
            t_grps.append(breaks[3])

        if prcnt_4_brk > 0.20:
            top_breaks.update({breaks[3] : breaks[4]})
            t_grps.append(breaks[3])
            t_grps.append(breaks[4])

        if prcnt_5_brk > 0.20:
            top_breaks.update({breaks[4] : breaks[5]})
            t_grps.append(breaks[4])
            t_grps.append(breaks[5])

        if prcnt_6_brk > 0.20:
            top_breaks.update({breaks[5] : breaks[6]})
            t_grps.append(breaks[5])
            t_grps.append(breaks[6])

        if prcnt_7_brk > 0.20:
            top_breaks.update({breaks[6] : breaks[7]})
            t_grps.append(breaks[6])
            t_grps.append(breaks[7])

        if prcnt_8_brk > 0.20:
            top_breaks.update({breaks[7] : breaks[8]})
            t_grps.append(breaks[7])
            t_grps.append(breaks[8])

        pick_1='False'
        pick_2='False'
        pick_3='False'
        pick_4='False'
        pick_5='False'
        pick_6='False'
        pick_7='False'
        pick_8='False'

        f_group = []

        if breaks[7] < exp_val:
            if prcnt_8_brk > 0.20:
                pick_8 = 'True'
                f_group.append(breaks[7])
                f_group.append(breaks[8])
        if breaks[6] < exp_val < breaks[7]:
            if prcnt_7_brk > 0.20:
                pick_7 = 'True'
                f_group.append(breaks[6])
                f_group.append(breaks[7])
        if breaks[5] < exp_val < breaks[6]:
            if prcnt_6_brk > 0.20:
                pick_6 = 'True'
                f_group.append(breaks[5])
                f_group.append(breaks[6])
        if breaks[4] <exp_val < breaks[5]:
            if prcnt_5_brk > 0.20:
                pick_5 = 'True'
                f_group.append(breaks[4])
                f_group.append(breaks[5])
        if breaks[3] < exp_val < breaks[4]:
            if prcnt_4_brk > 0.20:
                pick_4 = 'True'
                f_group.append(breaks[3])
                f_group.append(breaks[4])
        if breaks[2] < exp_val < breaks[3]:
            if prcnt_3_brk > 0.20:
                pick_3 = 'True'
                f_group.append(breaks[2])
                f_group.append(breaks[3])
        if breaks[1] < exp_val < breaks[2]:
            if prcnt_2_brk > 0.20:
                pick_2 = 'True'
                f_group.append(breaks[1])
                f_group.append(breaks[2])
        if exp_val < breaks[1]:
            if prcnt_1_brk > 0.20:
                pick_1 = 'True'
                f_group.append(breaks[0])
                f_group.append(breaks[1]) 

        if pick_1 == 'False' and pick_2 == 'False' and pick_3 == 'False' and pick_4 == 'False' and pick_5 == 'False' and pick_6 == 'False' and pick_7 == 'False' and pick_8 == 'False':
            closest_grp = min(t_grps, key=lambda x:abs(x-exp_val))

            for a,b in top_breaks.items(): 
                if closest_grp == a or  closest_grp == b:
                    f_group.append(a)
                    f_group.append(b)

        pixel_num = []
        pixel_no = -1
        top_albedos = []

        # store the top albedos and the pixel number that belong to those albedos
        for pixels in albedos:
            pixel_no = pixel_no + 1
            if f_group[0] < pixels < f_group[1]:
                top_albedos.append(pixels)
                if pixel_no not in pixel_num:
                    pixel_num.append(pixel_no)  


    # searching and storing the top pixels using the pixel numbers
    final_pixels = []
    for pix_num in pixel_num:
        final_pixels.append(selected_pixels[pix_num])

    # shuffle the remaining pixels randomly from the top group and pick twenty pixels
    rand_arr = random.sample(final_pixels, len(final_pixels))
    
    # for low albedo values take 10 samples of 20 pixels from each roof to balalnce final trainig data
    if exp_val < 0.70:
        px_cnt=-20
        t_px_cnt=0
        for i in range(10):
            px_cnt=px_cnt+20
            t_px_cnt=px_cnt+20            
            ten_pixels = rand_arr[px_cnt:t_px_cnt]

            # store the band values for all the bands in ten_pixels into separate list
            red_b = []
            green_b = []
            blue_b = []
            nir_b = []
            for pixels in ten_pixels:
                red_b.append(pixels[0]) 
                green_b.append(pixels[1])
                blue_b.append(pixels[2])
                nir_b.append(pixels[3])

            # calculate the mean values for all the bands from this list
            red_mean=mean(red_b)
            green_mean=mean(green_b)
            blue_mean=mean(blue_b)
            nir_mean=mean(nir_b)

            # calculate the standard deviation for all the bands from this list
            red_sd=statistics.stdev(red_b)
            green_sd=statistics.stdev(green_b)
            blue_sd=statistics.stdev(blue_b)
            nir_sd=statistics.stdev(nir_b)

            red_std.append(red_sd)
            green_std.append(green_sd)
            blue_std.append(blue_sd)
            nir_std.append(nir_sd)

            r_m.append(red_mean)
            g_m.append(green_mean)
            b_m.append(blue_mean)
            n_m.append(nir_mean)

            # store the ten_pixels from each image to a single list
            all_bands.append(ten_pixels)

            img_path.append(imageries)
            tile_id.append(tile)
            exp_values.append(exp_val)
            lat.append(latitude)
            lon.append(longitude)
            roofs.append(imageries[-31:-26])
            roof_add.append(rf_ad)
            footprint_shapes.append(ft_shp)
    else:            
        ten_pixels = rand_arr[0:20]

        # store the band values for all the bands in ten_pixels into separate list
        red_b = []
        green_b = []
        blue_b = []
        nir_b = []
        for pixels in ten_pixels:
            red_b.append(pixels[0]) 
            green_b.append(pixels[1])
            blue_b.append(pixels[2])
            nir_b.append(pixels[3])

        # calculate the mean values for all the bands from this list
        red_mean=mean(red_b)
        green_mean=mean(green_b)
        blue_mean=mean(blue_b)
        nir_mean=mean(nir_b)

        # calculate the standard deviation for all the bands from this list
        red_sd=statistics.stdev(red_b)
        green_sd=statistics.stdev(green_b)
        blue_sd=statistics.stdev(blue_b)
        nir_sd=statistics.stdev(nir_b)

        red_std.append(red_sd)
        green_std.append(green_sd)
        blue_std.append(blue_sd)
        nir_std.append(nir_sd)

        r_m.append(red_mean)
        g_m.append(green_mean)
        b_m.append(blue_mean)
        n_m.append(nir_mean)

        # store the ten_pixels from each image to a single list
        all_bands.append(ten_pixels)

        img_path.append(imageries)
        tile_id.append(tile)
        exp_values.append(exp_val)
        lat.append(latitude)
        lon.append(longitude)
        roofs.append(imageries[-31:-26])
        roof_add.append(rf_ad)
        footprint_shapes.append(ft_shp)

    # calculate albedos from the final selection of pixels
    calc_alb = []
    for pixels in ten_pixels:
        albedos = ((cb*pixels[2])+(cg*pixels[1])+(cr*pixels[0])+(ci*pixels[3]))
        calc_alb.append(albedos)
    mean_albedo = mean(calc_alb)
#     print(mean_albedo)
    mean_albedos.append(mean_albedo)

# store the results to a pandas library.
df = pd.DataFrame({'roof_address':roof_add, 'img_path': img_path, 'footprint_shapes':footprint_shapes,
                   'roof_no': roofs, 'latitude': lat, 'longitude': lon,'tile_id': tile_id, 'red_mean': r_m,'green_mean': g_m,
                   'blue_mean': b_m,'nir_mean': n_m,'red_std': red_std,'green_std': green_std,'blue_std': blue_std,
                   'nir_std': nir_std,'all_bands': all_bands,'expected_albedo': exp_values})

# Write the full results to csv using the pandas library. 
df.to_csv('band_values_gg_4-7.csv',encoding='utf8')

# store the calculated mean albedo values to a pandas library.
df_albedos = pd.DataFrame({'path': img_path, 'latitude': lat, 'longitude': lon, 'expected_albedo': exp_values, 'calculated_albedo': mean_albedos})

# Write the albedo results to csv using the pandas library. 
df_albedos.to_csv('mean_albedos_gg_4-5.csv',encoding='utf8')

/data/phase_i/roof_img/george/naip_v1/naipV1_2009-06-26_rf_00023_img_00_lat_34.2420_1m.tif
/data/phase_i/roof_img/george/naip_v1/naipV1_2009-06-26_rf_00024_img_00_lat_34.2403_1m.tif
/data/phase_i/roof_img/george/naip_v1/naipV1_2009-06-26_rf_00025_img_00_lat_34.0715_1m.tif
/data/phase_i/roof_img/george/naip_v1/naipV1_2009-06-26_rf_00026_img_00_lat_34.0689_1m.tif
/data/phase_i/roof_img/george/naip_v1/naipV1_2009-06-26_rf_00026_img_01_lat_34.0689_1m.tif
/data/phase_i/roof_img/george/naip_v1/naipV1_2009-06-22_rf_00028_img_00_lat_34.0562_1m.tif
/data/phase_i/roof_img/george/naip_v1/naipV1_2009-06-22_rf_00029_img_00_lat_34.1588_1m.tif
/data/phase_i/roof_img/george/naip_v1/naipV1_2009-06-22_rf_00030_img_00_lat_34.0343_1m.tif
/data/phase_i/roof_img/george/naip_v1/naipV1_2009-06-22_rf_00031_img_00_lat_34.1589_1m.tif
/data/phase_i/roof_img/george/naip_v1/naipV1_2009-06-22_rf_00032_img_00_lat_33.7668_1m.tif
/data/phase_i/roof_img/george/naip_v1/naipV1_2009-06-21_rf_00033_img_00_lat_38.5777_1m.tif

In [16]:
df

Unnamed: 0,roof_address,img_path,footprint_shapes,roof_no,latitude,longitude,tile_id,red_mean,green_mean,blue_mean,nir_mean,red_std,green_std,blue_std,nir_std,all_bands,expected_albedo
0,"Juniper HallPlummer St, Northridge, Los Angele...",/data/phase_i/roof_img/george/naip_v1/naipV1_2...,"POLYGON ((-118.531063 34.242118, -118.530268 3...",00023,34.242010,-118.530687,m_3411852_ne_11_1_20090626,0.606078,0.604902,0.671765,0.491569,0.048607,0.053678,0.017357,0.032866,"[(0.4823529411764706, 0.4588235294117647, 0.69...",0.09
1,"Juniper HallPlummer St, Northridge, Los Angele...",/data/phase_i/roof_img/george/naip_v1/naipV1_2...,"POLYGON ((-118.531063 34.242118, -118.530268 3...",00023,34.242010,-118.530687,m_3411852_ne_11_1_20090626,0.582353,0.590000,0.682353,0.494706,0.091780,0.092709,0.029263,0.027088,"[(0.615686274509804, 0.615686274509804, 0.6627...",0.09
2,"Juniper HallPlummer St, Northridge, Los Angele...",/data/phase_i/roof_img/george/naip_v1/naipV1_2...,"POLYGON ((-118.531063 34.242118, -118.530268 3...",00023,34.242010,-118.530687,m_3411852_ne_11_1_20090626,0.614510,0.618235,0.675490,0.477451,0.046715,0.042964,0.029617,0.018256,"[(0.6313725490196078, 0.6313725490196078, 0.67...",0.09
3,"Juniper HallPlummer St, Northridge, Los Angele...",/data/phase_i/roof_img/george/naip_v1/naipV1_2...,"POLYGON ((-118.531063 34.242118, -118.530268 3...",00023,34.242010,-118.530687,m_3411852_ne_11_1_20090626,0.618235,0.630980,0.666863,0.494902,0.052787,0.051555,0.051012,0.047650,"[(0.6196078431372549, 0.6352941176470588, 0.66...",0.09
4,"Juniper HallPlummer St, Northridge, Los Angele...",/data/phase_i/roof_img/george/naip_v1/naipV1_2...,"POLYGON ((-118.531063 34.242118, -118.530268 3...",00023,34.242010,-118.530687,m_3411852_ne_11_1_20090626,0.626863,0.632941,0.671961,0.486667,0.071830,0.079705,0.057863,0.031370,"[(0.6431372549019608, 0.6470588235294118, 0.66...",0.09
5,"Juniper HallPlummer St, Northridge, Los Angele...",/data/phase_i/roof_img/george/naip_v1/naipV1_2...,"POLYGON ((-118.531063 34.242118, -118.530268 3...",00023,34.242010,-118.530687,m_3411852_ne_11_1_20090626,0.597451,0.601176,0.679412,0.482745,0.065166,0.062267,0.029149,0.037634,"[(0.611764705882353, 0.611764705882353, 0.6588...",0.09
6,"Juniper HallPlummer St, Northridge, Los Angele...",/data/phase_i/roof_img/george/naip_v1/naipV1_2...,"POLYGON ((-118.531063 34.242118, -118.530268 3...",00023,34.242010,-118.530687,m_3411852_ne_11_1_20090626,0.606078,0.611176,0.661176,0.493529,0.067475,0.058184,0.072950,0.056533,"[(0.596078431372549, 0.6039215686274509, 0.654...",0.09
7,"Juniper HallPlummer St, Northridge, Los Angele...",/data/phase_i/roof_img/george/naip_v1/naipV1_2...,"POLYGON ((-118.531063 34.242118, -118.530268 3...",00023,34.242010,-118.530687,m_3411852_ne_11_1_20090626,0.612353,0.620000,0.683137,0.483922,0.073148,0.073739,0.027805,0.030146,"[(0.6235294117647059, 0.6274509803921569, 0.67...",0.09
8,"Juniper HallPlummer St, Northridge, Los Angele...",/data/phase_i/roof_img/george/naip_v1/naipV1_2...,"POLYGON ((-118.531063 34.242118, -118.530268 3...",00023,34.242010,-118.530687,m_3411852_ne_11_1_20090626,0.572157,0.584902,0.656667,0.499608,0.067636,0.063950,0.064657,0.061050,"[(0.5019607843137255, 0.47843137254901963, 0.7...",0.09
9,"Juniper HallPlummer St, Northridge, Los Angele...",/data/phase_i/roof_img/george/naip_v1/naipV1_2...,"POLYGON ((-118.531063 34.242118, -118.530268 3...",00023,34.242010,-118.530687,m_3411852_ne_11_1_20090626,0.592157,0.596078,0.657647,0.487059,0.050301,0.043891,0.045611,0.043680,"[(0.611764705882353, 0.615686274509804, 0.6627...",0.09


-------