In [1]:
# importing all modules
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt
import math
import numpy as np
from osgeo import gdal
import osr
from IPython.display import display, clear_output
%matplotlib inline


# function to get arealstatistik data collection year for campaign and roi
def collection_year(roi,yearAS):
    
    if yearAS == 2004:
        if roi=='roi1': return 2007
        if roi=='roi2': return 2006
        if roi=='roi3': return 2004
    if yearAS == 2013:
        if roi=='roi1': return 2016
        if roi=='roi2': return 2015
        if roi=='roi3': return 2013


# function to read in a data cube from a geo tiff file
def geotiff_to_datacube(fname):
    
    ds = gdal.Open(fname)
    
    geotransform = ds.GetGeoTransform()
    
    proj = osr.SpatialReference(wkt=ds.GetProjection())
    epsg = int(proj.GetAttrValue('AUTHORITY',1))
    
    xy_shape = np.array(ds.GetRasterBand(1).ReadAsArray()).shape
    
    # get number of bands in raster file
    n_bands = ds.RasterCount
    
    # initialize a data cube
    xyz_shape = xy_shape + (n_bands,)
    data_cube = np.ndarray(xyz_shape)
    
    # fill it with bands
    for i in range(1,n_bands+1):
        data_cube[:,:,i-1] =  np.array(ds.GetRasterBand(i).ReadAsArray())
    
    return data_cube, geotransform, epsg
    # end of read in datacube function


# global variables
folder_as = f'data/arealstatistik/'
folder_coefficients = f'data/coefficients/'
folder_composites = f'data/composites/'
folder_output = f'data/classification_data/'

spectral_bands = ['blue','green','red','nir','swir1','swir2']


        

In [2]:
rois = ['roi1','roi2','roi3']

# creating name of features
coefficients = ['c','a1','b1']

kernelv = ['top','mid','lower']
kernelh = ['left','center','right']
positions = [f'{v}{h}' for v in kernelv for h in kernelh]

feature_names = [f'{pos}_{band}_{coef}' for pos in positions for band in spectral_bands for coef in coefficients]

yearsAS = [2004,2013]

for yearAS in yearsAS:
    
    print(yearAS)

    # loading arealstatistik
    df_all = pd.read_csv(f'{folder_as}as{yearAS}_preprocessed.csv')


    # initializing containers
    x_coords, y_coords, land_covers = ([],[],[])
    datasets = []


    for roi in rois:
    
        print(roi)
        df = df_all[df_all['roi']==roi]
    
        year = collection_year(roi,yearAS)
    
        nrows = df.shape[0]

        # loading coefficients
        dc_coefficients, geotransform, epsg = geotiff_to_datacube(f'{folder_coefficients}coefficients_{roi}_{year}.tif')

        # using lists and dictionary to avoid slow iteration with pd data frame
        x_list = list(df['X']); x_coords.extend(x_list)
        y_list = list(df['Y']); y_coords.extend(y_list)
        lc_list = list(df['land_cover']); land_covers.extend(lc_list)
    
        features = np.zeros((nrows,len(feature_names)))

        # unpack geotranform
        xOrigin = geotransform[0]
        yOrigin = geotransform[3]
        pixelWidth = geotransform[1]
        pixelHeight = -geotransform[5]
    
        for i, (x_coord,y_coord) in enumerate(zip(x_list,y_list)):
    

            # computing column and row indices
            icol = int((x_coord - xOrigin) / pixelWidth)
            irow = int((yOrigin - y_coord ) / pixelHeight)

            kernel = dc_coefficients[irow-1:irow+2,icol-1:icol+2,:]
            features[i,:] = kernel.flatten()
        
        dataset = pd.DataFrame(data=features,columns=feature_names)
        dataset['roi'] = roi
        datasets.append(dataset)
    

    data = pd.concat(datasets,axis=0)
    data['X'] = x_coords
    data['Y'] = y_coords
    data['land_cover'] = land_covers

    # rearrange columns
    data = data[['roi','X','Y','land_cover',*feature_names]]

    data.to_csv(f'{folder_output}coefficients_labeled_as{yearAS}.csv', encoding='utf-8', index=False)

        

2004
roi1
roi2
roi3
2013
roi1
roi2
roi3


In [7]:
# label annual composites

rois = ['roi1','roi2','roi3']
yearAS = 2004

# creating name of features
feature_names = spectral_bands

# loading arealstatistik
df_all = pd.read_csv(f'{folder_as}as{yearAS}_preprocessed.csv')


# initializing containers
x_coords, y_coords, land_covers = ([],[],[])
datasets = []

for roi in rois:
    
    print(roi)
    
    # year of data acquisition
    year = collection_year(roi,yearAS)
    
    df = df_all[df_all['roi']==roi]
    nrows = df.shape[0]

    # loading coefficients
    dc_composite, geotransform, epsg = geotiff_to_datacube(f'{folder_composites}annual_composite_{roi}_{year}.tif')

    # using lists and dictionary to avoid slow iteration with pd data frame
    x_list = list(df['X']); x_coords.extend(x_list)
    y_list = list(df['Y']); y_coords.extend(y_list)
    lc_list = list(df['land_cover']); land_covers.extend(lc_list)
    
    features = np.zeros((nrows,len(feature_names)))

    # unpack geotranform
    xOrigin = geotransform[0]
    yOrigin = geotransform[3]
    pixelWidth = geotransform[1]
    pixelHeight = -geotransform[5]
    
    for i, (x_coord,y_coord) in enumerate(zip(x_list,y_list)):
    
        # computing column and row indices
        icol = int((x_coord - xOrigin) / pixelWidth)
        irow = int((yOrigin - y_coord ) / pixelHeight)
    
        features[i,:] = dc_composite[irow,icol,:]

        
    dataset = pd.DataFrame(data=features,columns=feature_names)
    dataset['roi'] = roi
    datasets.append(dataset)
    

data = pd.concat(datasets,axis=0)
data['X'] = x_coords
data['Y'] = y_coords
data['land_cover'] = land_covers

# rearrange columns
data = data[['roi','X','Y','land_cover',*feature_names]]

data.to_csv(f'{folder_output}annual_composite_labeled_as{yearAS}.csv', encoding='utf-8', index=False)
data.head()
        

roi1
roi2
roi3


Unnamed: 0,roi,X,Y,land_cover,blue,green,red,nir,swir1,swir2
0,roi1,667600,237700,1,320.0,562.0,448.0,2998.0,1358.0,589.0
1,roi1,667600,237800,2,307.0,456.0,326.0,1942.0,1260.0,554.0
2,roi1,667600,237900,2,211.0,335.0,206.0,1485.0,648.0,276.0
3,roi1,667600,238000,2,222.0,320.0,202.0,1317.0,437.0,240.0
4,roi1,667600,238100,2,208.0,336.0,205.0,1551.0,701.0,309.0


In [5]:
# label seasonal composites
rois = ['roi1','roi2','roi3']

# creating name of features
seasons = ['spring','summer','autumn']
feature_names = [f'{band}_{season}' for band in spectral_bands for season in seasons]

# loading arealstatistik
df_all = pd.read_csv(f'{folder_as}as{yearAS}_preprocessed.csv')

# initializing containers
x_coords, y_coords, land_covers = ([],[],[])
datasets = []


for roi in rois:
    
    print(roi)
    
    # year of data acquisition
    year = collection_year(roi,yearAS)
    
    df = df_all[df_all['roi']==roi]
    nrows = df.shape[0]

    # loading coefficients
    dc_spring, geotransform, epsg = geotiff_to_datacube(f'{folder_composites}seasonal_composite_spring_{roi}_{year}.tif')
    dc_summer, _, _ = geotiff_to_datacube(f'{folder_composites}seasonal_composite_summer_{roi}_{year}.tif')
    dc_autumn, _, _ = geotiff_to_datacube(f'{folder_composites}seasonal_composite_autumn_{roi}_{year}.tif')

    # using lists and dictionary to avoid slow iteration with pd data frame
    x_list = list(df['X']); x_coords.extend(x_list)
    y_list = list(df['Y']); y_coords.extend(y_list)
    lc_list = list(df['land_cover']); land_covers.extend(lc_list)
    
    features = np.zeros((nrows,len(feature_names)))

    # unpack geotranform
    xOrigin = geotransform[0]
    yOrigin = geotransform[3]
    pixelWidth = geotransform[1]
    pixelHeight = -geotransform[5]
    
    for i, (x_coord,y_coord) in enumerate(zip(x_list,y_list)):
    
        # computing column and row indices
        icol = int((x_coord - xOrigin) / pixelWidth)
        irow = int((yOrigin - y_coord ) / pixelHeight)
        
        for iband in range(len(spectral_bands)):
            features[i,iband*len(seasons)] = dc_spring[irow,icol,iband]
            features[i,iband*len(seasons)+1] = dc_summer[irow,icol,iband]
            features[i,iband*len(seasons)+2] = dc_autumn[irow,icol,iband]

    dataset = pd.DataFrame(data=features,columns=feature_names)
    dataset['roi'] = roi
    datasets.append(dataset)
    
data = pd.concat(datasets,axis=0)
data['X'] = x_coords
data['Y'] = y_coords
data['land_cover'] = land_covers

# rearrange columns
data = data[['roi','X','Y','land_cover',*feature_names]]

data.to_csv(f'{folder_output}seasonal_composite_labeled_as{yearAS}.csv', encoding='utf-8', index=False)
data.head()    

roi1
roi2
roi3


Unnamed: 0,roi,X,Y,land_cover,blue_spring,blue_summer,blue_autumn,green_spring,green_summer,green_autumn,...,red_autumn,nir_spring,nir_summer,nir_autumn,swir1_spring,swir1_summer,swir1_autumn,swir2_spring,swir2_summer,swir2_autumn
0,roi1,667600,237700,1,339.0,307.0,308.0,589.0,561.0,638.0,...,361.0,2930.0,2930.0,2761.0,1334.0,1358.0,1426.0,549.0,589.0,552.0
1,roi1,667600,237800,3,264.0,251.0,224.0,422.0,457.0,373.0,...,263.0,1848.0,1848.0,1812.0,1258.0,1261.0,1242.0,612.0,554.0,507.0
2,roi1,667600,237900,3,230.0,203.0,199.0,315.0,310.0,292.0,...,205.0,1456.0,1456.0,1354.0,542.0,542.0,536.0,240.0,240.0,232.0
3,roi1,667600,238000,3,238.0,221.0,229.0,283.0,316.0,279.0,...,200.0,1039.0,1039.0,998.0,403.0,437.0,382.0,213.0,199.0,196.0
4,roi1,667600,238100,3,215.0,208.0,199.0,339.0,336.0,308.0,...,198.0,1447.0,1447.0,1322.0,702.0,702.0,677.0,332.0,309.0,277.0
