In [55]:
import glob
import rasterio
import numpy as np
import matplotlib.pyplot as plt
from rasterio.mask import mask
from rasterio.plot import show
from matplotlib import colors
from skimage import exposure
from sklearn.metrics import confusion_matrix
import geopandas as gpd
import pandas as pd

In [56]:
class_label = glob.glob('./classes/*shp')
class_label

['./classes/class_name_Urban.shp',
 './classes/class_name_BareLand.shp',
 './classes/class_name_Farm.shp',
 './classes/class_name_Forest.shp',
 './classes/class_name_Water.shp',
 './classes/class_name_Cloud.shp']

In [57]:
building_shape = gpd.read_file(class_label[0])
building_shape

Unnamed: 0,id,class_name,class1,geometry
0,10,Urban,3,POLYGON ((277345.2154606701 -3756456.897802229...
1,11,Urban,3,POLYGON ((276196.4266609224 -3757782.791541938...
2,12,Urban,3,"POLYGON ((356993.3755881815 -3724022.76068935,..."
3,13,Urban,3,POLYGON ((275467.6637660824 -3761606.104266092...


In [58]:
classes = pd.DataFrame({'Name':building_shape.class_name.values,'Class':building_shape.class1})



In [81]:
def dir_location(date,band):
    directory  = '/vault/users/zafiirah/BigDataAfrica2019/AGRI-PROJECT/*'+band+'*'+date+'*tif'
    bands      = glob.glob(directory)
    img        = rasterio.open(bands[0])
    return img

#Get shape geometry in json format
def getFeatures(gdf):
    """Function to parse features from GeoDataFrame in such a manner that rasterio wants them"""
    import json
    return [json.loads(gdf.to_json())['features'][0]['geometry']]

#Functing for getting classes
def getClases(shp,date = '20141002'):
    class_df = pd.DataFrame()
    df_ls = []
    
    img_blue   = dir_location(date,band='blue')
    img_green  = dir_location(date,band='green')
    img_red    = dir_location(date,band='red')
    img_nir    = dir_location(date,band='nir')


    geom = gpd.read_file(shp)
    geom = geom.to_crs(crs=img_red.crs.data)
    sa   = np.arange(len(geom))
    sb   = np.arange(1, len(geom)+1)
    for a,b, in zip(sa, sb):
        cord1 = getFeatures(geom.iloc[a:b,:])
        out_blue, out_transform   = mask(img_blue, cord1, crop=True)
        out_green, out_transform1 = mask(img_green, cord1, crop=True)
        out_red, out_transform2   = mask(img_red, cord1, crop=True)
        out_nir, out_transform3   = mask(img_nir, cord1, crop=True)

        df_ls.append(pd.DataFrame({'Blue_Band':out_blue.ravel(),#*0.0001,
                              'Green_Band':out_green.ravel(),#*0.0001,
                              'Red_Band':out_red.ravel(),#*0.0001,
                              'NIR_Band':out_nir.ravel(),#*0.0001,
                              'Class':geom.class1[0]}))

    class_df = pd.concat(df_ls, axis=0)
    class_df = class_df.replace(-0.9999, np.nan)
    return class_df.dropna()

In [95]:
cls_df = pd.DataFrame()
df_ls = []
for s in class_label[0:len(class_label)-1]:
    df_ls.append(getClases(s))
    
cls_df = pd.concat(df_ls, axis=0)

In [96]:
cls_df.groupby('Class').count()

Unnamed: 0_level_0,Blue_Band,Green_Band,Red_Band,NIR_Band
Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,998,998,998,998
3,787,787,787,787
4,2343,2343,2343,2343
5,1610,1610,1610,1610
6,1715,1715,1715,1715


In [97]:
def allPixels(date):
    all_df = pd.DataFrame()
    df_ls = []
    
    img_blue   = dir_location(date,band='blue')
    img_green  = dir_location(date,band='green')
    img_red    = dir_location(date,band='red')
    img_nir    = dir_location(date,band='nir')
    
    
    df_ls.append(pd.DataFrame({'Blue_Band':img_blue.read(1)[2000:5000,1000:6000].ravel(),#*0.0001,
                              'Green_Band':img_green.read(1)[2000:5000,1000:6000].ravel(),#*0.0001,
                              'Red_Band':img_red.read(1)[2000:5000,1000:6000].ravel(),#*0.0001,
                              'NIR_Band':img_nir.read(1)[2000:5000,1000:6000].ravel()}))#*0.0001}))
    
    all_df = pd.concat(df_ls, axis=0)
    return all_df

In [98]:
cdf = allPixels(date)

In [99]:
cdf.to_csv('data.csv')

In [100]:
cdf.describe()

Unnamed: 0,Blue_Band,Green_Band,Red_Band,NIR_Band
count,14999580.0,14982990.0,14999860.0,14999760.0
mean,0.0743935,0.04472639,0.08813772,0.2348303
std,0.03772147,0.03004114,0.05092753,0.1014093
min,0.0001,0.0001,0.0001,0.0001
25%,0.0558,0.0295,0.0574,0.1736
50%,0.0711,0.0403,0.0803,0.2479
75%,0.0898,0.0535,0.1112,0.31
max,0.8003,0.742,0.8485,0.8982


In [None]:
data =  pd.read