## Import libraries and loading data

In [6]:
# Import required libraries
import numpy as np
import pandas as pd
import geopandas as gpd

In [7]:
# Load the GeoJSON file as geopandas dataframe, dropping the last row (it refers to the "Total" observation)
dataframe = gpd.read_file("../../data/raw_data/geo_data_trees.geojson")[:-1]

In [8]:
# Check the structure of the dataframe
dataframe.head()

Unnamed: 0,Tree ID,Name,DBH (cm),Replacement Value (eur),Carbon Storage (kg),Carbon Storage (eur),Gross Carbon Sequestration (kg/yr),Gross Carbon Sequestration (eur/yr),Avoided Runoff (mcube/yr),Avoided Runoff (eur/yr),...,Oxygen Production (kg/yr),Height (m),Crown Height (m),Crown Width (m),Canopy Cover (m2),Leaf Area (m2),Leaf Biomass (kg),Latitude,Longitude,geometry
0,1.0,Acer pseudoplatanus,8,179.43,12.0,1.92,1.1,0.17,0,0.01,...,2.8,3.5,1.0,1.5,1.8,3.5,0.2,46.051599,11.072014,POINT (11.07201 46.05160)
1,2.0,Acer pseudoplatanus,8,179.43,12.9,2.07,1.1,0.18,0,0.01,...,3.0,5.0,1.5,1.5,1.8,4.2,0.3,46.051541,11.072108,POINT (11.07211 46.05154)
2,3.0,Acer pseudoplatanus,9,227.09,16.2,2.61,1.3,0.2,0,0.01,...,3.4,4.5,1.3,1.5,1.8,4.0,0.3,46.051668,11.071959,POINT (11.07196 46.05167)
3,4.0,Acer pseudoplatanus,11,482.46,25.7,4.13,1.6,0.26,0,0.02,...,4.3,5.0,1.5,2.5,4.9,7.1,0.5,46.063778,11.15017,POINT (11.15017 46.06378)
4,7.0,Cupressus,25,1110.29,448.2,72.02,11.1,1.78,0,0.02,...,29.5,8.0,2.4,1.5,1.8,6.6,1.5,46.052305,11.07155,POINT (11.07155 46.05230)


In [9]:
# Check the dtypes of the variables in the dataframe - most of them are not correct
dataframe.dtypes

Tree ID                                 float64
Name                                     object
DBH (cm)                                 object
Replacement Value (eur)                  object
Carbon Storage (kg)                      object
Carbon Storage (eur)                     object
Gross Carbon Sequestration (kg/yr)       object
Gross Carbon Sequestration (eur/yr)      object
Avoided Runoff (mcube/yr)                object
Avoided Runoff (eur/yr)                  object
Carbon Avoided (kg/yr)                   object
Carbon Avoided (eur/yr)                  object
Pollution Removal (g/yr)                 object
Pollution Removal (eur/yr)               object
Energy Savings (eur/yr)                  object
Total Annual Benefits (eur/yr)           object
Oxygen Production (kg/yr)                object
Height (m)                               object
Crown Height (m)                         object
Crown Width (m)                          object
Canopy Cover (m2)                       

In [10]:
# Update the dtypes of each variable in the dataframe with the correct one
dataframe = dataframe.astype({"Tree ID": int,
                              "Name": str,
                              "DBH (cm)": float,
                              "Replacement Value (eur)": float,
                              "Carbon Storage (kg)": float,
                              "Carbon Storage (eur)": float,
                              "Gross Carbon Sequestration (kg/yr)": float,
                              "Gross Carbon Sequestration (eur/yr)": float,
                              "Avoided Runoff (mcube/yr)": float,
                              "Avoided Runoff (eur/yr)": float,
                              "Carbon Avoided (kg/yr)": float,
                              "Carbon Avoided (eur/yr)": float,
                              "Pollution Removal (g/yr)": float,
                              "Pollution Removal (eur/yr)": float,
                              "Energy Savings (eur/yr)": float,
                              "Total Annual Benefits (eur/yr)": float,
                              "Oxygen Production (kg/yr)": float,
                              "Height (m)": float,
                              "Crown Height (m)": float,
                              "Crown Width (m)": float,
                              "Canopy Cover (m2)": float,
                              "Leaf Area (m2)": float,
                              "Leaf Biomass (kg)": float,
                              "Latitude": float,
                              "Longitude": float})

In [11]:
# Check that the dtypes update has worked properly
dataframe.dtypes

Tree ID                                   int32
Name                                     object
DBH (cm)                                float64
Replacement Value (eur)                 float64
Carbon Storage (kg)                     float64
Carbon Storage (eur)                    float64
Gross Carbon Sequestration (kg/yr)      float64
Gross Carbon Sequestration (eur/yr)     float64
Avoided Runoff (mcube/yr)               float64
Avoided Runoff (eur/yr)                 float64
Carbon Avoided (kg/yr)                  float64
Carbon Avoided (eur/yr)                 float64
Pollution Removal (g/yr)                float64
Pollution Removal (eur/yr)              float64
Energy Savings (eur/yr)                 float64
Total Annual Benefits (eur/yr)          float64
Oxygen Production (kg/yr)               float64
Height (m)                              float64
Crown Height (m)                        float64
Crown Width (m)                         float64
Canopy Cover (m2)                       

## Code for first plot

This is the code to generate the csv file required to display the **abundance** of the neigborhoods in Trento.

In [12]:
# Load the GeoJSON file as geopandas dataframe
circoscrizioni = gpd.read_file("../../data/raw_data/circoscrizioni.json")

In [13]:
# Check the structure of the dataframe
circoscrizioni.head()

Unnamed: 0,numero_cir,area,perimetro,nome,fumetto,geometry
0,2,15712181,17850,MEANO,Circoscrizione n. 2 - MEANO,"POLYGON ((11.15678 46.11661, 11.15687 46.11666..."
1,3,36949609,38485,BONDONE,Circoscrizione n. 3 - BONDONE,"POLYGON ((11.08026 46.11571, 11.08026 46.11571..."
2,4,8663138,16403,SARDAGNA,Circoscrizione n. 4 - SARDAGNA,"POLYGON ((11.09034 46.07917, 11.09032 46.07899..."
3,6,13199398,20575,ARGENTARIO,Circoscrizione n. 6 - ARGENTARIO,"POLYGON ((11.17163 46.08095, 11.17161 46.08106..."
4,11,3073349,10962,S.GIUSEPPE-S.CHIARA,Circoscrizione n. 11 - S.GIUSEPPE-S.CHIARA,"POLYGON ((11.11135 46.05878, 11.11137 46.05878..."


In [14]:
# Join the dataframe of trees with the "circoscrizioni" one using
# the "within" predicate that permits to verify if a geometric *Point* 
# is inside a geometric *Polygon* (it is a built-in method of geopandas)
trees_in_circo = gpd.sjoin(dataframe, circoscrizioni, predicate="within")[["Tree ID", "Name", "nome"]]

In [15]:
# Check the dataframe we obtained
trees_in_circo.head()

Unnamed: 0,Tree ID,Name,nome
0,1,Acer pseudoplatanus,SARDAGNA
1,2,Acer pseudoplatanus,SARDAGNA
2,3,Acer pseudoplatanus,SARDAGNA
4,7,Cupressus,SARDAGNA
5,8,Acer pseudoplatanus,SARDAGNA


In [16]:
# Rename the variables
trees_in_circo.rename(columns={"Name":"Species",
                               "nome":"Circoscrizione"},
                      inplace=True)

In [17]:
# Count the number of trees in each "Circoscrizione"
circo_count = trees_in_circo.groupby(["Circoscrizione"]).size()

In [18]:
# Rename the count variable
circo_count.rename("Count", inplace=True)

Circoscrizione
ARGENTARIO                       454
BONDONE                          292
CENTRO STORICO PIEDICASTELLO    2964
GARDOLO                         1651
MATTARELLO                       264
MEANO                            206
OLTREFERSINA                    2695
POVO                             189
RAVINA-ROMAGNANO                 319
S.GIUSEPPE-S.CHIARA             3024
SARDAGNA                          38
VILLAZZANO                       416
Name: Count, dtype: int64

In [19]:
# Save the table
circo_count.to_csv("../../data/assign3/assign3-plot1.csv", sep=",", index=True)

In [20]:
# Load it to verify its integrity
pd.read_csv("../../data/assign3/assign3-plot1.csv")

Unnamed: 0,Circoscrizione,Count
0,ARGENTARIO,454
1,BONDONE,292
2,CENTRO STORICO PIEDICASTELLO,2964
3,GARDOLO,1651
4,MATTARELLO,264
5,MEANO,206
6,OLTREFERSINA,2695
7,POVO,189
8,RAVINA-ROMAGNANO,319
9,S.GIUSEPPE-S.CHIARA,3024


## Code for second plot


In [21]:
# Join the dataframe of trees with the "circoscrizioni" one using
# the "within" predicate that permits to verify if a geometric *Point* 
# is inside a geometric *Polygon* (it is a built-in method of geopandas)
# extracting the features of interest
canopy_in_circo = gpd.sjoin(dataframe, circoscrizioni, predicate="within")[["Canopy Cover (m2)", "nome","area"]]

In [22]:
# Check the structure of the dataframe
canopy_in_circo.head()

Unnamed: 0,Canopy Cover (m2),nome,area
0,1.8,SARDAGNA,8663138
1,1.8,SARDAGNA,8663138
2,1.8,SARDAGNA,8663138
4,1.8,SARDAGNA,8663138
5,1.8,SARDAGNA,8663138


In [23]:
# Rename the variables
canopy_in_circo.rename(columns={"Canopy Cover (m2)":"Canopy_Cover",
                               "nome":"Circoscrizione"},
                      inplace=True)

In [24]:
canopy_in_circo.head()

Unnamed: 0,Canopy_Cover,Circoscrizione,area
0,1.8,SARDAGNA,8663138
1,1.8,SARDAGNA,8663138
2,1.8,SARDAGNA,8663138
4,1.8,SARDAGNA,8663138
5,1.8,SARDAGNA,8663138


In [25]:
# Sum the canopy cover in each "Circoscrizione"
circo_count_2 = canopy_in_circo.groupby(["Circoscrizione", "area"], as_index=False)["Canopy_Cover"].sum()

In [26]:
# Check the sums
circo_count_2.head()

Unnamed: 0,Circoscrizione,area,Canopy_Cover
0,ARGENTARIO,13199398,12416.1
1,BONDONE,36949609,5979.6
2,CENTRO STORICO PIEDICASTELLO,8599464,131189.1
3,GARDOLO,8817959,27279.5
4,MATTARELLO,16409555,4995.4


In [27]:
# Compute density
circo_count_2["density"] = circo_count_2["Canopy_Cover"]/circo_count_2["area"]

In [28]:
# Check the dataframe structure
circo_count_2.head()

Unnamed: 0,Circoscrizione,area,Canopy_Cover,density
0,ARGENTARIO,13199398,12416.1,0.000941
1,BONDONE,36949609,5979.6,0.000162
2,CENTRO STORICO PIEDICASTELLO,8599464,131189.1,0.015255
3,GARDOLO,8817959,27279.5,0.003094
4,MATTARELLO,16409555,4995.4,0.000304


In [29]:
# Add the counting
circo_count_2 = circo_count_2.join(circo_count, "Circoscrizione")

In [30]:
# Check the dataframe structure
circo_count_2.head()

Unnamed: 0,Circoscrizione,area,Canopy_Cover,density,Count
0,ARGENTARIO,13199398,12416.1,0.000941,454
1,BONDONE,36949609,5979.6,0.000162,292
2,CENTRO STORICO PIEDICASTELLO,8599464,131189.1,0.015255,2964
3,GARDOLO,8817959,27279.5,0.003094,1651
4,MATTARELLO,16409555,4995.4,0.000304,264


In [31]:
# Save the table
circo_count_2.to_csv("../../data/assign3/assign3-plot2.csv", sep=",", index = False)

In [32]:
# Load it to verify its integrity
pd.read_csv("../../data/assign3/assign3-plot2.csv")  

Unnamed: 0,Circoscrizione,area,Canopy_Cover,density,Count
0,ARGENTARIO,13199398,12416.1,0.000941,454
1,BONDONE,36949609,5979.6,0.000162,292
2,CENTRO STORICO PIEDICASTELLO,8599464,131189.1,0.015255,2964
3,GARDOLO,8817959,27279.5,0.003094,1651
4,MATTARELLO,16409555,4995.4,0.000304,264
5,MEANO,15712181,3136.1,0.0002,206
6,OLTREFERSINA,6962104,87988.8,0.012638,2695
7,POVO,15433274,2866.9,0.000186,189
8,RAVINA-ROMAGNANO,16687779,6289.5,0.000377,319
9,S.GIUSEPPE-S.CHIARA,3073349,93333.1,0.030369,3024


## Code for third plot


In [43]:
oxygen_in_circo = gpd.sjoin(dataframe, circoscrizioni, predicate="within")[["Tree ID", "Name","Oxygen Production (kg/yr)","area", "nome"]]

In [44]:
oxygen_in_circo.head()

Unnamed: 0,Tree ID,Name,Oxygen Production (kg/yr),area,nome
0,1,Acer pseudoplatanus,2.8,8663138,SARDAGNA
1,2,Acer pseudoplatanus,3.0,8663138,SARDAGNA
2,3,Acer pseudoplatanus,3.4,8663138,SARDAGNA
4,7,Cupressus,29.5,8663138,SARDAGNA
5,8,Acer pseudoplatanus,3.4,8663138,SARDAGNA


In [45]:
# Rename the variables
oxygen_in_circo.rename(columns={"Name":"Species",
                               "nome":"Circoscrizione",
                               "Oxygen Production (kg/yr)":"Oxygen(kg/yr)"},
                      inplace=True)

In [48]:
# Sum the canopy cover in each "Circoscrizione"
oxygen_in_circo_2 = oxygen_in_circo.groupby(["Circoscrizione","area"], as_index=False)["Oxygen(kg/yr)"].sum()

In [49]:
oxygen_in_circo_2.head()

Unnamed: 0,Circoscrizione,area,Oxygen(kg/yr)
0,ARGENTARIO,13199398,6848.3
1,BONDONE,36949609,4559.9
2,CENTRO STORICO PIEDICASTELLO,8599464,53818.4
3,GARDOLO,8817959,12353.7
4,MATTARELLO,16409555,3492.6


In [52]:
oxygen_in_circo_3 = oxygen_in_circo_2.join(circo_count, "Circoscrizione")

In [53]:
oxygen_in_circo_3.head()

Unnamed: 0,Circoscrizione,area,Oxygen(kg/yr),Count
0,ARGENTARIO,13199398,6848.3,454
1,BONDONE,36949609,4559.9,292
2,CENTRO STORICO PIEDICASTELLO,8599464,53818.4,2964
3,GARDOLO,8817959,12353.7,1651
4,MATTARELLO,16409555,3492.6,264


In [54]:
# Save the table
oxygen_in_circo_3.to_csv("../../data/assign3/assign3-plot3.csv", sep=",", index = False)

In [55]:
# Load it to verify its integrity
pd.read_csv("../../data/assign3/assign3-plot3.csv")  

Unnamed: 0,Circoscrizione,area,Oxygen(kg/yr),Count
0,ARGENTARIO,13199398,6848.3,454
1,BONDONE,36949609,4559.9,292
2,CENTRO STORICO PIEDICASTELLO,8599464,53818.4,2964
3,GARDOLO,8817959,12353.7,1651
4,MATTARELLO,16409555,3492.6,264
5,MEANO,15712181,2961.1,206
6,OLTREFERSINA,6962104,43064.0,2695
7,POVO,15433274,2762.2,189
8,RAVINA-ROMAGNANO,16687779,3680.8,319
9,S.GIUSEPPE-S.CHIARA,3073349,51717.3,3024
