## Import libraries and loading data

In [1]:
# Import required libraries
import numpy as np
import pandas as pd
import geopandas as gpd

In [2]:
# Load the GeoJSON file as geopandas dataframe, dropping the last row (it refers to the "Total" observation)
dataframe = gpd.read_file("../../data/raw_data/geo_data_trees.geojson")[:-1]

In [3]:
# Check the structure of the dataframe
dataframe.head()

Unnamed: 0,Tree ID,Name,DBH (cm),Replacement Value (eur),Carbon Storage (kg),Carbon Storage (eur),Gross Carbon Sequestration (kg/yr),Gross Carbon Sequestration (eur/yr),Avoided Runoff (mcube/yr),Avoided Runoff (eur/yr),...,Oxygen Production (kg/yr),Height (m),Crown Height (m),Crown Width (m),Canopy Cover (m2),Leaf Area (m2),Leaf Biomass (kg),Latitude,Longitude,geometry
0,1.0,Acer pseudoplatanus,8,179.43,12.0,1.92,1.1,0.17,0,0.01,...,2.8,3.5,1.0,1.5,1.8,3.5,0.2,46.051599,11.072014,POINT (11.07201 46.05160)
1,2.0,Acer pseudoplatanus,8,179.43,12.9,2.07,1.1,0.18,0,0.01,...,3.0,5.0,1.5,1.5,1.8,4.2,0.3,46.051541,11.072108,POINT (11.07211 46.05154)
2,3.0,Acer pseudoplatanus,9,227.09,16.2,2.61,1.3,0.2,0,0.01,...,3.4,4.5,1.3,1.5,1.8,4.0,0.3,46.051668,11.071959,POINT (11.07196 46.05167)
3,4.0,Acer pseudoplatanus,11,482.46,25.7,4.13,1.6,0.26,0,0.02,...,4.3,5.0,1.5,2.5,4.9,7.1,0.5,46.063778,11.15017,POINT (11.15017 46.06378)
4,7.0,Cupressus,25,1110.29,448.2,72.02,11.1,1.78,0,0.02,...,29.5,8.0,2.4,1.5,1.8,6.6,1.5,46.052305,11.07155,POINT (11.07155 46.05230)


In [4]:
# Check the dtypes of the variables in the dataframe - most of them are not correct
dataframe.dtypes

Tree ID                                 float64
Name                                     object
DBH (cm)                                 object
Replacement Value (eur)                  object
Carbon Storage (kg)                      object
Carbon Storage (eur)                     object
Gross Carbon Sequestration (kg/yr)       object
Gross Carbon Sequestration (eur/yr)      object
Avoided Runoff (mcube/yr)                object
Avoided Runoff (eur/yr)                  object
Carbon Avoided (kg/yr)                   object
Carbon Avoided (eur/yr)                  object
Pollution Removal (g/yr)                 object
Pollution Removal (eur/yr)               object
Energy Savings (eur/yr)                  object
Total Annual Benefits (eur/yr)           object
Oxygen Production (kg/yr)                object
Height (m)                               object
Crown Height (m)                         object
Crown Width (m)                          object
Canopy Cover (m2)                       

In [5]:
# Update the dtypes of each variable in the dataframe with the correct one
dataframe = dataframe.astype({"Tree ID": int,
                              "Name": str,
                              "DBH (cm)": float,
                              "Replacement Value (eur)": float,
                              "Carbon Storage (kg)": float,
                              "Carbon Storage (eur)": float,
                              "Gross Carbon Sequestration (kg/yr)": float,
                              "Gross Carbon Sequestration (eur/yr)": float,
                              "Avoided Runoff (mcube/yr)": float,
                              "Avoided Runoff (eur/yr)": float,
                              "Carbon Avoided (kg/yr)": float,
                              "Carbon Avoided (eur/yr)": float,
                              "Pollution Removal (g/yr)": float,
                              "Pollution Removal (eur/yr)": float,
                              "Energy Savings (eur/yr)": float,
                              "Total Annual Benefits (eur/yr)": float,
                              "Oxygen Production (kg/yr)": float,
                              "Height (m)": float,
                              "Crown Height (m)": float,
                              "Crown Width (m)": float,
                              "Canopy Cover (m2)": float,
                              "Leaf Area (m2)": float,
                              "Leaf Biomass (kg)": float,
                              "Latitude": float,
                              "Longitude": float})

In [6]:
# Check that the dtypes update has worked properly
dataframe.dtypes

Tree ID                                   int32
Name                                     object
DBH (cm)                                float64
Replacement Value (eur)                 float64
Carbon Storage (kg)                     float64
Carbon Storage (eur)                    float64
Gross Carbon Sequestration (kg/yr)      float64
Gross Carbon Sequestration (eur/yr)     float64
Avoided Runoff (mcube/yr)               float64
Avoided Runoff (eur/yr)                 float64
Carbon Avoided (kg/yr)                  float64
Carbon Avoided (eur/yr)                 float64
Pollution Removal (g/yr)                float64
Pollution Removal (eur/yr)              float64
Energy Savings (eur/yr)                 float64
Total Annual Benefits (eur/yr)          float64
Oxygen Production (kg/yr)               float64
Height (m)                              float64
Crown Height (m)                        float64
Crown Width (m)                         float64
Canopy Cover (m2)                       

---

## Code for first plot

This is the code to generate the csv file required to display the **histogram of height** of trees in Trento.

In [7]:
# Extract the three measures of trees we decide to consider
tree_dimensions = dataframe[["Name", "Height (m)", "DBH (cm)", "Canopy Cover (m2)", "Leaf Area (m2)"]].copy()

In [8]:
# Rename the variables
tree_dimensions.rename(columns={"Name":"Species",
                                "Height (m)":"Height",
                                "DBH (cm)":"Diameter",
                                "Canopy Cover (m2)":"Canopy_size",
                                "Leaf Area (m2)":"Leaf_area"},
                      inplace=True)

In [9]:
hist_tree_dimensions = tree_dimensions[["Height", "Diameter", "Canopy_size", "Leaf_area"]].copy()

In [10]:
# Print the min and max informations of the height of trees
print(hist_tree_dimensions.min())
print(hist_tree_dimensions.max())

Height         0.1
Diameter       1.3
Canopy_size    0.1
Leaf_area      0.3
dtype: float64
Height           60.0
Diameter        705.0
Canopy_size     530.9
Leaf_area      2394.0
dtype: float64


In [11]:
# Save the dataframe of tree height
hist_tree_dimensions.to_csv("../../data/assign2-plot1.csv", sep=",", index=False)

In [12]:
# Load it to verify its integrity
pd.read_csv("../../data/assign2-plot1.csv")

Unnamed: 0,Height,Diameter,Canopy_size,Leaf_area
0,3.5,8.0,1.8,3.5
1,5.0,8.0,1.8,4.2
2,4.5,9.0,1.8,4.0
3,5.0,11.0,4.9,7.1
4,8.0,25.0,1.8,6.6
...,...,...,...,...
12507,2.5,28.2,21.7,43.6
12508,2.5,28.2,21.7,36.1
12509,8.0,28.2,21.7,59.9
12510,8.0,28.2,21.7,41.3


---

## Code for second plot

This is the code to generate the csv file required to display the boxplot of three possible measures (height, canopy cover, and diameter) of top-5 tree species in Trento.

In [13]:
# Load from a previous dataset the top-5 tree species
top5_species = pd.read_csv("../../data/assign1-plot1.csv")["Species"][:5]

In [14]:
# Visualize tree dimensions to check it
tree_dimensions

Unnamed: 0,Species,Height,Diameter,Canopy_size,Leaf_area
0,Acer pseudoplatanus,3.5,8.0,1.8,3.5
1,Acer pseudoplatanus,5.0,8.0,1.8,4.2
2,Acer pseudoplatanus,4.5,9.0,1.8,4.0
3,Acer pseudoplatanus,5.0,11.0,4.9,7.1
4,Cupressus,8.0,25.0,1.8,6.6
...,...,...,...,...,...
12507,Philadelphus,2.5,28.2,21.7,43.6
12508,Philadelphus,2.5,28.2,21.7,36.1
12509,Forsythia x intermedia,8.0,28.2,21.7,59.9
12510,Aucuba japonica,8.0,28.2,21.7,41.3


In [15]:
# Keep only the top-5 tree species in the dataframe
tree_dimensions_top5 = tree_dimensions[tree_dimensions["Species"].isin(top5_species)]

In [16]:
# Check the result
tree_dimensions_top5

Unnamed: 0,Species,Height,Diameter,Canopy_size,Leaf_area
17,Tilia cordata,4.5,8.0,7.1,9.5
18,Tilia cordata,5.0,11.0,7.1,13.3
19,Tilia cordata,5.0,11.0,7.1,11.8
20,Tilia cordata,4.0,7.0,3.1,4.9
21,Tilia cordata,6.0,18.0,28.3,71.4
...,...,...,...,...,...
12320,Carpinus betulus,6.2,17.0,10.1,22.7
12321,Carpinus betulus,6.2,17.0,10.1,21.2
12322,Carpinus betulus,6.2,17.0,10.1,21.2
12343,Celtis australis,8.3,30.1,35.2,96.9


In [17]:
# Save the dataframe of top-5 trees dimensions
tree_dimensions_top5.to_csv("../../data/assign2-plot2.csv", sep=",", index=False)

In [18]:
# Load it to verify its integrity
pd.read_csv("../../data/assign2-plot2.csv")

Unnamed: 0,Species,Height,Diameter,Canopy_size,Leaf_area
0,Tilia cordata,4.5,8.0,7.1,9.5
1,Tilia cordata,5.0,11.0,7.1,13.3
2,Tilia cordata,5.0,11.0,7.1,11.8
3,Tilia cordata,4.0,7.0,3.1,4.9
4,Tilia cordata,6.0,18.0,28.3,71.4
...,...,...,...,...,...
3151,Carpinus betulus,6.2,17.0,10.1,22.7
3152,Carpinus betulus,6.2,17.0,10.1,21.2
3153,Carpinus betulus,6.2,17.0,10.1,21.2
3154,Celtis australis,8.3,30.1,35.2,96.9


--- Davide Marinelli

In [28]:
data1 = tree_dimensions_top5[tree_dimensions_top5["Species"].str.contains("Tilia cordata")]
data2 = tree_dimensions_top5[tree_dimensions_top5["Species"].str.contains("Carpinus betulus")]
data3 = tree_dimensions_top5[tree_dimensions_top5["Species"].str.contains("Celtis australis")]
data4 = tree_dimensions_top5[tree_dimensions_top5["Species"].str.contains("Platanus x hispanica")]
data5 = tree_dimensions_top5[tree_dimensions_top5["Species"].str.contains("Aesculus hippocastanum")]
data1

Unnamed: 0,Species,Height,Diameter,Canopy_size,Leaf_area
17,Tilia cordata,4.5,8.0,7.1,9.5
18,Tilia cordata,5.0,11.0,7.1,13.3
19,Tilia cordata,5.0,11.0,7.1,11.8
20,Tilia cordata,4.0,7.0,3.1,4.9
21,Tilia cordata,6.0,18.0,28.3,71.4
...,...,...,...,...,...
11737,Tilia cordata,5.0,18.0,7.1,15.8
12101,Tilia cordata,12.0,52.0,38.5,160.4
12103,Tilia cordata,16.0,70.0,50.3,273.4
12294,Tilia cordata,15.0,48.4,12.6,57.8


In [38]:
q1 = np.percentile(data1["Height"], 25)
median = np.percentile(data1["Height"], 50)
q3 = np.percentile(data1["Height"], 75)
interQuantileRange=q3-q1
min_box = q1 - 1.5 * interQuantileRange
max_box = q3 + 1.5 * interQuantileRange
print(q1)
print(median)
print(q3)
print(min_box)
print(max_box)


4.0
5.0
7.0
-0.5
11.5


In [35]:
q1 = np.percentile(data2["Height"], 25)
median = np.percentile(data2["Height"], 50)
q3 = np.percentile(data2["Height"], 75)
print(p1)
print(p2)
print(p3)

4.0
5.25
8.0


---

## Code for third plot

---

## Code for fourth plot

---

## Code for fifth plot

In [175]:
# Extract the three measures of trees suggested, and the ones we decide to consider
bubble_chart = dataframe[["Name", "Height (m)", "DBH (cm)", "Leaf Area (m2)", 
                          "Gross Carbon Sequestration (kg/yr)", "Canopy Cover (m2)"]].copy()

In [176]:
# Rename the variables
bubble_chart.rename(columns={"Name":"Species",
                             "Height (m)":"Height",
                             "DBH (cm)":"Diameter",
                             "Canopy Cover (m2)":"Canopy_size",
                             "Gross Carbon Sequestration (kg/yr)":"CO2",
                             "Leaf Area (m2)":"Leaf_area"},
                      inplace=True)

In [177]:
# Visualize bubble chart dataframe to check it
bubble_chart

Unnamed: 0,Species,Height,Diameter,Leaf_area,CO2,Canopy_size
0,Acer pseudoplatanus,3.5,8.0,3.5,1.1,1.8
1,Acer pseudoplatanus,5.0,8.0,4.2,1.1,1.8
2,Acer pseudoplatanus,4.5,9.0,4.0,1.3,1.8
3,Acer pseudoplatanus,5.0,11.0,7.1,1.6,4.9
4,Cupressus,8.0,25.0,6.6,11.1,1.8
...,...,...,...,...,...,...
12507,Philadelphus,2.5,28.2,43.6,10.7,21.7
12508,Philadelphus,2.5,28.2,36.1,10.7,21.7
12509,Forsythia x intermedia,8.0,28.2,59.9,10.5,21.7
12510,Aucuba japonica,8.0,28.2,41.3,6.3,21.7


In [178]:
# Keep only the top-5 tree species in the dataframe
bubble_chart_top5 = bubble_chart[bubble_chart["Species"].isin(top5_species)]

In [179]:
# Check the result
bubble_chart_top5

Unnamed: 0,Species,Height,Diameter,Leaf_area,CO2,Canopy_size
17,Tilia cordata,4.5,8.0,9.5,0.8,7.1
18,Tilia cordata,5.0,11.0,13.3,1.3,7.1
19,Tilia cordata,5.0,11.0,11.8,1.3,7.1
20,Tilia cordata,4.0,7.0,4.9,0.7,3.1
21,Tilia cordata,6.0,18.0,71.4,2.4,28.3
...,...,...,...,...,...,...
12320,Carpinus betulus,6.2,17.0,22.7,3.2,10.1
12321,Carpinus betulus,6.2,17.0,21.2,3.2,10.1
12322,Carpinus betulus,6.2,17.0,21.2,3.2,10.1
12343,Celtis australis,8.3,30.1,96.9,1.3,35.2


In [180]:
# Save the dataframe of top-5 bubble chart
bubble_chart_top5.to_csv("../../data/assign2-plot5.csv", sep=",", index=False)

In [181]:
# Load it to verify its integrity
pd.read_csv("../../data/assign2-plot5.csv")

Unnamed: 0,Species,Height,Diameter,Leaf_area,CO2,Canopy_size
0,Tilia cordata,4.5,8.0,9.5,0.8,7.1
1,Tilia cordata,5.0,11.0,13.3,1.3,7.1
2,Tilia cordata,5.0,11.0,11.8,1.3,7.1
3,Tilia cordata,4.0,7.0,4.9,0.7,3.1
4,Tilia cordata,6.0,18.0,71.4,2.4,28.3
...,...,...,...,...,...,...
3151,Carpinus betulus,6.2,17.0,22.7,3.2,10.1
3152,Carpinus betulus,6.2,17.0,21.2,3.2,10.1
3153,Carpinus betulus,6.2,17.0,21.2,3.2,10.1
3154,Celtis australis,8.3,30.1,96.9,1.3,35.2
