In [1]:
# library import
import pandas as pd
import geopandas as gpd
import numpy as np
import math
import fiona
from IPython import display
import treeParsing as tP

# Import Geometry

In [2]:
city = "bologna"

In [3]:
# geom and dataimport
fNameGrid = f"../01_GIS/CityBoundaries/{city}.gpkg"
fNameTrees = f"../01_GIS/TreeInventories/bologna_street_trees_84.gpkg"

grid = gpd.read_file(fNameGrid)
grid["fid"]=list(grid.index)
trees = gpd.read_file(fNameTrees)

plantTaxonomy = pd.read_csv("../03_DATA/plantTaxonomy.csv")

# Calculate Basal and Clean Data

In [4]:
trees.head()

Unnamed: 0,NUM_PT,COD_UG,SPECIE,CL_H,CL_CIRC,PREGIO,decodifica,decodifi_2,decodifi_4,geometry
0,95784,1,Rp,3,C5,N,12 - 16,75 - 90 (19-28 cm),Robinia pseudoacacia,POINT (11.30020 44.49657)
1,95787,1,Rp,3,C6,N,12 - 16,90 - 110 (28-35cm),Robinia pseudoacacia,POINT (11.30035 44.49675)
2,7963,3167,Cel,3,C8,N,12 - 16,140 - 170 (45-54cm),Celtis australis,POINT (11.36547 44.49536)
3,7993,3227,Pla,5,C9,N,>23,170 - 200 (54-64 cm),Platanus acerifolia,POINT (11.36643 44.49619)
4,95851,1,Tilp,2,C2,N,6 - 12,15 - 30 (5-10 cm),Tilia platyphyllos,POINT (11.29967 44.49535)


In [5]:
diamTranslate = {
    '75 - 90 (19-28 cm)':23.5, 
    '90 - 110 (28-35cm)':31.5, 
    '140 - 170 (45-54cm)':49.5,
    '170 - 200 (54-64 cm)':59, 
    '15 - 30 (5-10 cm)':7.5,
    '110 - 140 (35-45 cm)':40, 
    '45 - 60 (15-19 cm)':17,
    '200 - 230 (64-73 cm)':68.5, 
    '< 15 (5 cm)':5, 
    None:0, 
    '230 - 260 (73-80cm)':76.5,
    '30 - 45 (10 -15cm)':12.5
}

In [6]:
trees['DBH'] = trees['decodifi_2'].map(diamTranslate)
trees = trees[trees["DBH"]>0]
trees = trees[trees["DBH"]<=trees.quantile(0.99).DBH]
trees['BasalArea'] = trees['DBH']**2 * 0.00007854

In [7]:
trees["DBH"].mean()

24.385771357515146

In [8]:
trees.sort_values("DBH",ascending=False).head(100)

Unnamed: 0,NUM_PT,COD_UG,SPECIE,CL_H,CL_CIRC,PREGIO,decodifica,decodifi_2,decodifi_4,geometry,DBH,BasalArea
49484,22535,536,Poc,5,C10,N,>23,200 - 230 (64-73 cm),,POINT (11.38726 44.47041),68.5,0.368529
32631,756,8,Pon,4,C10,N,16 - 23,200 - 230 (64-73 cm),Populus nigra,POINT (11.33360 44.52110),68.5,0.368529
34039,87572,2790,Cda2,4,C10,N,16 - 23,200 - 230 (64-73 cm),"Cedrus atlantica ""Glauca""",POINT (11.37493 44.46383),68.5,0.368529
33605,2045,44,Aei,4,C10,N,16 - 23,200 - 230 (64-73 cm),Aesculus hippocastanum,POINT (11.35008 44.48498),68.5,0.368529
72104,48911,1430,Pla,5,C10,N,>23,200 - 230 (64-73 cm),Platanus acerifolia,POINT (11.33825 44.50481),68.5,0.368529
...,...,...,...,...,...,...,...,...,...,...,...,...
75833,80166,2016,Cdd,5,C10,N,>23,200 - 230 (64-73 cm),Cedrus deodara,POINT (11.39814 44.51221),68.5,0.368529
47168,5118,3039,Cel,4,C10,N,16 - 23,200 - 230 (64-73 cm),Celtis australis,POINT (11.38346 44.47493),68.5,0.368529
42158,34814,3334,Qus,5,C10,N,>23,200 - 230 (64-73 cm),Quercus petraea,POINT (11.25568 44.50236),68.5,0.368529
3902,50422,1981,Pla,4,C10,N,16 - 23,200 - 230 (64-73 cm),Platanus acerifolia,POINT (11.37224 44.50074),68.5,0.368529


In [9]:
scientificNameCol = "decodifi_4"
splitNameCols = ['CleanGenus','CleanSpecies',"HybridPlaceholder1","HybridPlaceholder2","HybridPlaceholder3","HybridPlaceholder4","HybridPlaceholder5"]

In [10]:
# data cleaning and merging
cleanTrees = trees[trees[scientificNameCol].notnull()][["geometry",scientificNameCol, "BasalArea"]]
cleanTrees["Scientific"] = cleanTrees[scientificNameCol].str.title().str.replace('?','')

nameCols = splitNameCols[:len(cleanTrees["Scientific"].str.split(" ",expand=True,).iloc[0])]

cleanTrees[nameCols] = cleanTrees["Scientific"].str.split(" ",expand=True,)
cleanTrees = cleanTrees.merge(plantTaxonomy, how="left", left_on="CleanGenus", right_on="genus", suffixes={"","_taxa"})

In [11]:
cleanTrees.head()

Unnamed: 0,geometry,decodifi_4,BasalArea,Scientific,CleanGenus,CleanSpecies,HybridPlaceholder1,HybridPlaceholder2,HybridPlaceholder3,genus,family,superfamily,class,order,phylum,kingdom
0,POINT (11.30020 44.49657),Robinia pseudoacacia,0.043374,Robinia Pseudoacacia,Robinia,Pseudoacacia,,,,Robinia,Fabaceae,,Magnoliopsida,Fabales,Tracheophyta,Plantae
1,POINT (11.30035 44.49675),Robinia pseudoacacia,0.077931,Robinia Pseudoacacia,Robinia,Pseudoacacia,,,,Robinia,Fabaceae,,Magnoliopsida,Fabales,Tracheophyta,Plantae
2,POINT (11.36547 44.49536),Celtis australis,0.192443,Celtis Australis,Celtis,Australis,,,,Celtis,Cannabaceae,,Magnoliopsida,Rosales,Tracheophyta,Plantae
3,POINT (11.36643 44.49619),Platanus acerifolia,0.273398,Platanus Acerifolia,Platanus,Acerifolia,,,,Platanus,Platanaceae,,Magnoliopsida,Proteales,Tracheophyta,Plantae
4,POINT (11.29967 44.49535),Tilia platyphyllos,0.004418,Tilia Platyphyllos,Tilia,Platyphyllos,,,,Tilia,Malvaceae,,Magnoliopsida,Malvales,Tracheophyta,Plantae


In [12]:
cleanTrees.groupby('Scientific').sum().reset_index()

Unnamed: 0,Scientific,BasalArea
0,Abies Alba,4.436764
1,Abies Nordmanniana,0.049087
2,Abies Pinsapo,0.017671
3,Acer Campestre,87.349419
4,Acer Davidii,0.004418
...,...,...
170,Ulmus Carpinifolia,67.415948
171,Ulmus Pumila,29.914158
172,Viburnum Tinus,0.043374
173,Wisteria Sinensis,0.198471


# City Wide Figures

In [13]:
# species frequency and pct
speciesCount = cleanTrees.groupby('Scientific').sum().reset_index().rename(columns={"Scientific":"Species","BasalArea":"Frequency"})
speciesCount["Pct"] = speciesCount["Frequency"]/sum(speciesCount['Frequency'])
speciesShannon = tP.ShannonEntropy(list(speciesCount["Pct"]))
speciesSimpson = tP.simpson_di(list(speciesCount['Species']), list(speciesCount['Frequency']))

# genus frequency and pct
genusCount = cleanTrees.groupby('CleanGenus').sum().reset_index().rename(columns={"CleanGenus":"Genus","BasalArea":"Frequency"})
genusCount["Pct"] = genusCount["Frequency"]/sum(genusCount['Frequency'])
genusShannon = tP.ShannonEntropy(list(genusCount["Pct"]))
genusSimpson = tP.simpson_di(list(genusCount['Genus']), list(genusCount['Frequency']))

# family frequency and pct
familyCount= cleanTrees.groupby("family").sum().reset_index().rename(columns={"BasalArea":"Frequency"})
familyCount["Pct"] = familyCount["Frequency"]/sum(familyCount['Frequency'])
familyShannon = tP.ShannonEntropy(list(familyCount["Pct"]))
familySimpson = tP.simpson_di(list(familyCount['family']), list(familyCount['Frequency']))

print(f"Most Abundant Species %: {max(speciesCount['Pct'])}\nSpecies Shannon Index: {speciesShannon}\nSpecies Simpson Index: {speciesSimpson}\nMost Abundant Genus %: {max(genusCount['Pct'])}\nGenus Shannon Index: {genusShannon}\nGenus Simpson Index: {genusSimpson}\nMost Abundant Family %: {max(familyCount['Pct'])}\nFamily Shannon Index: {familyShannon}\nFamily Simpson Index: {familySimpson}\n")

Most Abundant Species %: 0.15607756082127938
Species Shannon Index: 3.2712228717496266
Species Simpson Index: 0.9304863151877883
Most Abundant Genus %: 0.15607756082127947
Genus Shannon Index: 2.6739808843439183
Genus Simpson Index: 0.9029945060527755
Most Abundant Family %: 0.15723492316083953
Family Shannon Index: 2.387752558846326
Family Simpson Index: 0.8884818023740858



# Species Makeup

In [26]:
treeMakeup = tP.speciesParse(cleanTrees, grid)

In [27]:
max(treeMakeup[treeMakeup.filter(like='sumPCT').columns].iloc[0])

0.22866211079947543

In [28]:
max(treeMakeup[treeMakeup.filter(like='sumPCT').columns].iloc[1])

0.16011071358342915

In [16]:
treeMakeup.to_csv(f"./csv/{city}InsideOutsideSpeciesMakeupWithStems.csv")

# Genus Makeup

In [29]:
treeMakeup = tP.genusParse(cleanTrees, grid)

In [30]:
max(treeMakeup[treeMakeup.filter(like='sumPCT').columns].iloc[0])

0.23054781159044005

In [31]:
max(treeMakeup[treeMakeup.filter(like='sumPCT').columns].iloc[1])

0.16011071358343013

In [20]:
treeMakeup.to_csv(f"./csv/{city}InsideOutsideGenusMakeupWithStems.csv")

# Family Makeup

In [32]:
treeMakeup = tP.familyParse(cleanTrees, grid)

In [35]:
max(treeMakeup[treeMakeup.filter(like='sumPCT').columns].iloc[0])

0.23540977892366838

In [36]:
max(treeMakeup[treeMakeup.filter(like='sumPCT').columns].iloc[1])

0.16115091148994087

In [37]:
treeMakeup.to_csv(f"./csv/{city}InsideOutsideFamilyMakeupWithStems.csv")

# Combine

In [38]:
speciesDf = pd.read_csv(f"./csv/{city}InsideOutsideSpeciesMakeupWithStems.csv")
genusDf = pd.read_csv(f"./csv/{city}InsideOutsideGenusMakeupWithStems.csv")
familyDf = pd.read_csv(f"./csv/{city}InsideOutsideFamilyMakeupWithStems.csv")
mergedDf = pd.concat([speciesDf, genusDf, familyDf], axis=1)
mergedDf = mergedDf[list(mergedDf.filter(like="Diversity").columns)+list(mergedDf.filter(like="Abundant").columns)+["sum","num"]]
mergedDf.to_csv(f"./csv/{city}InsideOutsideOutputWithStems.csv")