In [5]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt
import math
import numpy as np
from osgeo import gdal
import osr
from IPython.display import display, clear_output
%matplotlib inline



# function to read in a data cube from a geo tiff file
def geotiff_to_datacube(fname):
    
    ds = gdal.Open(fname)
    
    geotransform = ds.GetGeoTransform()
    
    proj = osr.SpatialReference(wkt=ds.GetProjection())
    epsg = int(proj.GetAttrValue('AUTHORITY',1))
    
    xy_shape = np.array(ds.GetRasterBand(1).ReadAsArray()).shape
    
    # get number of bands in raster file
    n_bands = ds.RasterCount
    
    # initialize a data cube
    xyz_shape = xy_shape + (n_bands,)
    data_cube = np.ndarray(xyz_shape)
    
    # fill it with bands
    for i in range(1,n_bands+1):
        data_cube[:,:,i-1] =  np.array(ds.GetRasterBand(i).ReadAsArray())
    
    return data_cube, geotransform, epsg
    # end of read in datacube function



rois = ['roi1','roi2','roi3']
data_folder_as = f'arealstatistik/'
data_folder_coefficients = f'data/'

spectral_bands = ['blue','green','red','nir','swir1','swir2']

coefficients = ['c','a1','b1']

kernelv = ['top','mid','lower']
kernelh = ['left','center','right']
positions = [f'{v}{h}' for v in kernelv for h in kernelh]

feature_names = [f'{pos}_{band}_{coef}' for pos in positions for band in spectral_bands for coef in coefficients]

x_coords, y_coords, land_covers = ([],[],[])

datasets = []
for roi in rois:
    
    print(roi)
    
    year = 2006 if roi=='roi2' else 2007
    
    # loading arealstatistik
    df = pd.read_csv(f'{data_folder_as}{roi}_as_preprocessed.csv')
    nrows = df.shape[0]

    # loading coefficients
    dc_coefficients, geotransform, epsg = geotiff_to_datacube(f'{data_folder_coefficients}coefficients_{roi}_{year}.tif')

    # using lists and dictionary to avoid slow iteration with pd data frame
    x_list = list(df['X']); x_coords.extend(x_list)
    y_list = list(df['Y']); y_coords.extend(y_list)
    lc_list = list(df['land_cover']); land_covers.extend(lc_list)
    
    features = np.zeros((nrows,len(feature_names)))

    # unpack geotranform
    xOrigin = geotransform[0]
    yOrigin = geotransform[3]
    pixelWidth = geotransform[1]
    pixelHeight = -geotransform[5]
    
    for i, (x_coord,y_coord) in enumerate(zip(x_list,y_list)):
    

        # computing column and row indices
        icol = int((x_coord - xOrigin) / pixelWidth)
        irow = int((yOrigin - y_coord ) / pixelHeight)
    
        kernel = dc_coefficients[irow-1:irow+2,icol-1:icol+2,:]
        features[i,:] = kernel.flatten()
        
    dataset = pd.DataFrame(data=features,columns=feature_names)
    dataset['roi'] = roi
    datasets.append(dataset)
    

data = pd.concat(datasets,axis=0)
data['X'] = x_coords
data['Y'] = y_coords
data['land_cover'] = land_covers

# rearrange columns
data = data[['roi','X','Y','land_cover',*feature_names]]


data.to_csv(f'{data_folder_coefficients}labeled_data.csv', encoding='utf-8', index=False)
data.head()
        
        

roi1
roi2
roi3


Unnamed: 0,roi,X,Y,land_cover,topleft_blue_c,topleft_blue_a1,topleft_blue_b1,topleft_green_c,topleft_green_a1,topleft_green_b1,...,lowerright_red_b1,lowerright_nir_c,lowerright_nir_a1,lowerright_nir_b1,lowerright_swir1_c,lowerright_swir1_a1,lowerright_swir1_b1,lowerright_swir2_c,lowerright_swir2_a1,lowerright_swir2_b1
0,roi1,667600,252600,2,380.638367,65.649033,38.340347,504.42926,-17.51136,105.570168,...,71.432404,2337.309326,-1329.80542,-254.358856,1125.302246,-367.617249,-0.383392,516.211548,-87.554733,66.352211
1,roi1,667700,252600,3,332.687958,32.793858,59.761925,421.774048,-45.964584,97.390305,...,64.486885,2031.968262,-1501.842651,-55.873653,966.418213,-525.751587,82.470558,459.251709,-175.961182,91.420761
2,roi1,667800,252600,3,306.430084,20.838465,31.285831,387.130463,-60.250031,60.218811,...,71.439423,1651.890991,-594.699707,22.281591,659.893311,-279.609406,61.193287,286.992676,-129.099976,49.272015
3,roi1,667900,252600,3,289.838318,2.0636,22.950096,367.62085,-66.887375,54.132072,...,90.105492,1693.013306,-506.476105,67.89476,635.475464,-265.514648,125.416039,281.357941,-138.290726,83.930519
4,roi1,668000,252600,3,301.951447,-8.857222,25.563393,398.568115,-87.492523,52.679016,...,67.915154,1584.583008,-763.324524,120.866425,741.02356,-299.501282,168.571472,344.163757,-136.045792,104.747475
