In [1]:
import geopandas as gpd
import pandas as pd
import os

In [2]:
SHP_DIR = 'D:\\Deutschland\\FUB\\master_thesis\\data\\Reference_data\\polygons'
OUTPUT_DIR = 'D:\\Deutschland\\FUB\\master_thesis\\data\\gee\\output'

INPUT_SHP = 'inpolysites.shp'
OUTPUT_SHP = 'buffered_wgs_inpolysites.shp'
REF_CSV = 'reference.csv'
LABEL_CSV = 'label.csv'
MERGE_CSV = 'merged.csv'
CLASS_CSV = '4_classes.csv'

ref_path = os.path.join(OUTPUT_DIR, REF_CSV)
merge_path = os.path.join(OUTPUT_DIR, MERGE_CSV)
label_path = os.path.join(OUTPUT_DIR, LABEL_CSV)
class_path = os.path.join(OUTPUT_DIR, CLASS_CSV)


In [3]:
def load_shp_file() -> gpd.GeoDataFrame:
    in_path = os.path.join(SHP_DIR, INPUT_SHP)
    # load shp file in python
    gdf = gpd.read_file(in_path)
    print(f'import file {in_path}')
    # remove useless columns
    keys = ['OBJEKTART', 'NUART', 'FBEZ', 'BETR', 'REVIER', 'DIST', 'ABT', 
            'RWET','BI', 'AI_FOLGE', 'BEST_BEZ', 'STICHTAG', 'LWET', 'FEVERFAHRE', 
            'TURNUS', 'BU_WLRT', 'LWET_TEXT', 'MASSNAHMEN', 'NWW_KAT', 'SHAPE_AREA', 
            'SHAPE_LEN', 'NHB_BEZ', 'WEFLKZ', 'GUID_ABT', 'layer', 'path']
    gdf.drop(columns=keys, inplace=True)
    # add uuid to each polygon
    gdf['id'] = gdf.index + 1
    return gdf

In [4]:
def export_shp_file(data_frame:gpd.GeoDataFrame) -> None:
    out_path = os.path.join(SHP_DIR, OUTPUT_SHP)
    gpd.GeoDataFrame.to_file(data_frame, out_path)
    print(f'export file {out_path}')

In [6]:
def buffer() -> None:
    # import shp file
    polygons = load_shp_file()
    # buffer
    polygons["geometry"] = gpd.GeoDataFrame.buffer(polygons, -10)
    print("Buffer -10 m")
    # reproject
    polygons = polygons.to_crs(epsg=4326)
    print("Reproject to EPSG:4326")
    # export shp file
    export_shp_file(polygons)

In [3]:
def load_csv_file(file_path:str) -> pd.DataFrame:
    df = pd.read_csv(file_path, sep=',', header=0, index_col='id')
    print(f'import file {file_path}')
    return df

In [4]:
def export_csv_file(df:pd.DataFrame, file_path:str, index:bool) -> None:
    df.to_csv(file_path, index=index)
    print(f'export file {file_path}')

In [26]:
def export_reference_data() -> None:
    df = load_shp_file()
    cols = ['BST2_BA_1', 'BST2_BA_2', 'BST2_BA_3', 'BST2_BA_4', 'BST2_BA_5', 'BST2_BA_6', 'BST2_BA_7', 'BST2_BA_8', 
        'BST2_BAA_1', 'BST2_BAA_2', 'BST2_BAA_3', 'BST2_BAA_4', 'BST2_BAA_5', 'BST2_BAA_6', 'BST2_BAA_7', 'BST2_BAA_8', 
        'BST3_BA_1', 'BST3_BA_2', 'BST3_BA_3', 'BST3_BA_4', 'BST3_BA_5', 'BST3_BA_6', 'BST3_BA_7', 'BST3_BA_8', 
        'BST3_BAA_1', 'BST3_BAA_2', 'BST3_BAA_3', 'BST3_BAA_4', 'BST3_BAA_5', 'BST3_BAA_6', 'BST3_BAA_7', 'BST3_BAA_8',
        'geometry']
    df = df.drop(df[df['BST1_BA_1'] == 0].index)
    df = df.drop(df[df['BST2_BA_1'] != 0].index)
    df.drop(columns=cols, inplace=True)
    export_csv_file(df, ref_path, False)

export_reference_data()

import file D:\Deutschland\FUB\master_thesis\data\Reference_data\polygons\inpolysites.shp
export file D:\Deutschland\FUB\master_thesis\data\gee\output\reference.csv


In [5]:
def classify() -> pd.DataFrame:
    ref = load_csv_file(ref_path)
    # cols = ['Spruce', 'Beech', 'Silver fir', 'Pine', 'Douglas fir', 'Oak', 'Sycamore', 
    #         'Coniferous', 'Deciduous', 'id']
    cols = ['Spruce', 'Beech', 'Coniferous', 'Deciduous', 'id']
    labels = []
    for index, row in ref.iterrows():
        label = pd.DataFrame(columns=cols, index=[0])
        label.fillna(value=0, inplace=True)
        label['id'] = index
        for i in range(8):
            if row[i] == 110:
                label['Spruce'] += row[i + 8]
            elif row[i] == 710:
                label['Beech'] += row[i + 8]
            # elif row[i] == 210:
            #     label['Silver fir'] += row[i + 8]
            # elif row[i] == 410:
            #     label['Pine'] += row[i + 8]
            # elif row[i] == 310:
            #     label['Douglas fir'] += row[i + 8]
            # elif row[i] == 600:
            #     label['Oak'] += row[i + 8]
            # elif row[i] == 821:
            #     label['Sycamore'] += row[i + 8]
            elif row[i] > 110 and row[i] <= 590:
                label['Coniferous'] += row[i + 8]
            elif row[i] >= 600 and row[i] != 710:
                label['Deciduous'] += row[i + 8]
        labels.append(label)
    output = pd.concat(labels, ignore_index=True)
    output.set_index(['id'], inplace=True)
    return output

In [7]:
def export_classes() -> pd.DataFrame:
    # load labels
    classes = classify()
    merged = load_csv_file(merge_path)
    # select both labels and data are available polygons
    output = pd.merge(classes, merged, on='id', how='inner')
    # delete sum of percentage is not 100%
    output['sum'] = output.iloc[:,0:4].sum(axis=1)
    output = output[output['sum'] == 100]
    # delete useless columns
    cols = [i for i in range(4, 280)]
    output.drop(output.columns[cols], axis=1, inplace=True)
    export_csv_file(output, class_path, True)
    return output

export_classes()

import file D:\Deutschland\FUB\master_thesis\data\gee\output\reference.csv
import file D:\Deutschland\FUB\master_thesis\data\gee\output\merged.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\output\4_classes.csv


Unnamed: 0_level_0,Spruce,Beech,Coniferous,Deciduous
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,10,55,35,0
2,20,5,70,5
3,10,0,60,30
4,80,5,15,0
5,40,5,30,25
...,...,...,...,...
11051,35,10,45,10
11052,30,50,15,5
11053,30,25,45,0
11054,60,5,30,5


In [6]:
classes = load_csv_file(class_path)
count = {'Spruce85':0, 'Beech85': 0, 
         'Spruce50 Coniferous':0, 'Spruce50 Deciduous':0,
         'Beech50 Coniferous':0, 'Beech50 Deciduous':0, 
         'Coniferous85':0, 'Deciduous85':0,
         'Coniferous50 Deciduous':0, 'Deciduous50 Coniferous':0}
for index, row in classes.iterrows():
    if row[0] >= 85:
        count['Spruce85'] += 1
    elif row[1] >= 85:
        count['Beech85'] += 1
    elif row[0] >= 50 and row[1] == 0 and row[3] == 0:
        count['Spruce50 Coniferous'] += 1
    elif row[0] >= 50 and row[2] == 0:
        count['Spruce50 Deciduous'] += 1
    elif row[1] >= 50 and row[3] == 0:
        count['Beech50 Coniferous'] += 1
    elif row[1] >= 50 and row[0] == 0 and row[2] == 0:
        count['Beech50 Deciduous'] += 1
    elif (row[0] + row[2]) >= 85:
        count['Coniferous85'] += 1
    elif (row[1] + row[3]) >= 85:
        count['Deciduous85'] += 1
    elif (row[0] + row[2]) >= 50:
        count['Coniferous50 Deciduous'] += 1
    elif (row[1] + row[3]) >= 50:
        count['Deciduous50 Coniferous'] += 1
print(sum(count.values()))
count


import file D:\Deutschland\FUB\master_thesis\data\gee\output\labels.csv
7935


{'Spruce85': 875,
 'Beech85': 290,
 'Spruce50 Coniferous': 210,
 'Spruce50 Deciduous': 274,
 'Beech50 Coniferous': 133,
 'Beech50 Deciduous': 189,
 'Coniferous85': 1820,
 'Deciduous85': 1462,
 'Coniferous50 Deciduous': 1948,
 'Deciduous50 Coniferous': 734}

In [5]:
def export_labels() -> pd.DataFrame:
    """
    Spruce85: class 0
    Beech85: class 1
    Spruce50 Coniferous: class 2
    Spruce50 Deciduous: class 3
    Beech50 Coniferous: class 4
    Beech50 Deciduous: class 5
    Coniferous85: class 6
    Deciduous85: class 7
    Coniferous50 Deciduous: class 8
    Deciduous50 Coniferous: class 9
    """
    labels = load_csv_file(class_path)
    labels['class'] = 0
    for index, row in labels.iterrows():
        if row[0] >= 85:
            row[4] = 0
        elif row[1] >= 85:
            row[4] = 1
        elif row[0] >= 50 and row[1] == 0 and row[3] == 0:
            row[4] = 2
        elif row[0] >= 50 and row[2] == 0:
            row[4] = 3
        elif row[1] >= 50 and row[3] == 0:
            row[4] = 4
        elif row[1] >= 50 and row[0] == 0 and row[2] == 0:
            row[4] = 5
        elif (row[0] + row[2]) >= 85:
            row[4] = 6
        elif (row[1] + row[3]) >= 85:
            row[4] = 7
        elif (row[0] + row[2]) >= 50:
            row[4] = 8
        elif (row[1] + row[3]) >= 50:
            row[4] = 9
    labels.drop(labels.columns[[0,1,2,3]],  axis=1, inplace=True)
    export_csv_file(labels, label_path, True)
    return labels

export_labels()

import file D:\Deutschland\FUB\master_thesis\data\gee\output\4_classes.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\output\label.csv


Unnamed: 0_level_0,class
id,Unnamed: 1_level_1
1,4
2,6
3,8
4,6
5,8
...,...
11051,8
11052,9
11053,8
11054,6
