In [2]:
# Data manipulation and analysis
import pandas as pd
import numpy as np
import requests
import io
from sklearn.preprocessing import LabelEncoder
from io import BytesIO
from zipfile import ZipFile
import tempfile
import os
import json

# Geo
import geopandas as gpd
from shapely.geometry import Polygon, MultiPolygon

# Visualization
import matplotlib.pylab as plt
import matplotlib.patches as mpatches
import seaborn as sns
import folium
from folium import plugins
import branca.colormap as cm
from branca.colormap import StepColormap

%matplotlib inline


In [3]:
# Define a function to download and read a CSV file.
def download_and_load_csv(url):
    response = requests.get(url)
    csv_string = response.content.decode('utf-8')
    df = pd.read_csv(io.StringIO(csv_string))
    return df

# API Link
download_link_1 = 'https://data.melbourne.vic.gov.au/api/explore/v2.1/catalog/datasets/trees-with-species-and-dimensions-urban-forest/exports/csv?lang=en&timezone=Australia%2FSydney&use_labels=true&delimiter=%2C'
download_link_2 = 'https://data.melbourne.vic.gov.au/api/explore/v2.1/catalog/datasets/argyle-square-air-quality/exports/csv?lang=en&timezone=Australia%2FSydney&use_labels=true&delimiter=%2C'

# Use functions to download and load data
tree_df = download_and_load_csv(download_link_1)
airquality_df = download_and_load_csv(download_link_2)


In [4]:
tree_df = tree_df[['CoM ID','Year Planted', 'Date Planted', 'Located in', 'geolocation']]
tree_df

Unnamed: 0,CoM ID,Year Planted,Date Planted,Located in,geolocation
0,1440992,2009,2009-12-14,Park,"-37.789042536009, 144.94750113149306"
1,1286119,2008,2008-07-16,Park,"-37.78989006812276, 144.9256959906416"
2,1439848,2009,2009-09-08,Street,"-37.795227592098875, 144.91940533967247"
3,1584631,2015,2015-06-18,Park,"-37.795178798251044, 144.95235531785673"
4,1286271,2008,2008-12-18,Street,"-37.7904175404039, 144.92779056976474"
...,...,...,...,...,...
76923,1526689,2013,2013-01-22,Park,"-37.77968423847431, 144.96217220362632"
76924,1783637,2020,2020-05-19,Park,"-37.781052359602995, 144.95237974656814"
76925,1023894,1899,1999-12-30,Street,"-37.8092487661872, 144.96541492209963"
76926,1287085,2008,2008-08-25,Street,"-37.83631076544197, 144.98513657513598"


In [5]:
airquality_df

Unnamed: 0,date_measure,dev_id,Sensor Name,Lat Long,averageSpl,carbonMonoxide,humidity,iBatt,nitrogenDioxide,ozone,particulatesErr,particulatesVsn,peakSpl,pm1,pm10,pm25,temperature,vBatt,vPanel
0,2021-05-15T16:04:33+10:00,ems-ec8a,Air Quality Sensor 2,"-37.802772, 144.9655513",56.0,-6261.0,51.0,12.0,258.0,119.0,0.0,151.0,62.0,0.0,0.0,0.0,14.9,4.01,18.33
1,2021-05-15T16:10:49+10:00,ems-ce10,Air Quality Sensor 1,"-37.802772, 144.9655513",70.0,-2887.0,61.0,132.0,123.0,-13.0,0.0,151.0,87.0,0.0,0.0,0.0,13.2,3.90,16.03
2,2021-05-15T17:49:32+10:00,ems-ec8a,Air Quality Sensor 2,"-37.802772, 144.9655513",56.0,-6822.0,59.0,98.0,306.0,147.0,0.0,151.0,60.0,0.0,1.0,1.0,12.7,3.95,0.00
3,2021-05-15T19:19:32+10:00,ems-ec8a,Air Quality Sensor 2,"-37.802772, 144.9655513",57.0,-6822.0,63.0,109.0,306.0,142.0,0.0,151.0,65.0,2.0,5.0,3.0,12.1,3.93,0.00
4,2021-05-15T19:25:48+10:00,ems-ce10,Air Quality Sensor 1,"-37.802772, 144.9655513",66.0,-2887.0,69.0,141.0,115.0,-17.0,0.0,151.0,81.0,1.0,5.0,2.0,12.3,3.86,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136159,2023-08-23T11:54:46+10:00,ems-ce10,Air Quality Sensor 1,"-37.802772, 144.9655513",55.0,341.0,63.0,126.0,58.0,-142.0,0.0,151.0,62.0,0.0,0.0,0.0,22.9,3.36,0.14
136160,2023-08-23T12:39:45+10:00,ems-ce10,Air Quality Sensor 1,"-37.802772, 144.9655513",55.0,341.0,62.0,115.0,58.0,-137.0,0.0,151.0,63.0,0.0,0.0,0.0,23.0,3.35,0.13
136161,2023-08-23T13:54:45+10:00,ems-ce10,Air Quality Sensor 1,"-37.802772, 144.9655513",54.0,341.0,61.0,150.0,65.0,-132.0,0.0,151.0,62.0,0.0,0.0,0.0,23.3,3.33,0.12
136162,2023-08-23T14:09:45+10:00,ems-ce10,Air Quality Sensor 1,"-37.802772, 144.9655513",54.0,341.0,61.0,141.0,65.0,-137.0,0.0,151.0,63.0,0.0,1.0,0.0,23.3,3.31,0.16


In [8]:
# Redefining the climate_columns variable
climate_columns = ['averageSpl', 'carbonMonoxide', 'humidity', 'nitrogenDioxide', 'ozone',
                   'pm1', 'pm10', 'pm25', 'temperature']

# Group by year and calculate the mean for each climate-related column
yearly_climate_data = airquality_df.groupby('year')[climate_columns].mean()

yearly_climate_data.reset_index(inplace=True)

yearly_climate_data

Unnamed: 0,year,averageSpl,carbonMonoxide,humidity,nitrogenDioxide,ozone,pm1,pm10,pm25,temperature
0,2020,58.94106,-4092.711303,68.991351,166.654759,51.853665,3.110019,5.773877,5.042852,15.481667
1,2021,58.968768,-4434.11572,73.276783,202.743852,65.618403,3.219453,5.661043,4.547659,16.319262
2,2022,56.673733,-5676.437887,75.696437,256.799176,126.063312,3.501128,6.12974,5.049418,16.977887
3,2023,57.191831,-5202.634769,74.948875,241.54005,111.685362,3.545809,6.20397,5.111466,19.133502


In [14]:
tree_df['Date Planted'] = pd.to_datetime(tree_df['Date Planted'], format='%Y-%m-%d', errors='coerce')

tree_2020 = tree_df[tree_df['Date Planted'].dt.year == 2020]
tree_2021 = tree_df[tree_df['Date Planted'].dt.year == 2021]
tree_2004 = tree_df[tree_df['Date Planted'].dt.year == 2004]

In [16]:
tree_count_by_year = tree_df['Date Planted'].dt.year.value_counts().sort_index()

tree_count_by_year

1977        1
1997     6570
1998    12044
1999     3009
2000     7612
2004        1
2005      270
2006     2967
2007      804
2008     2276
2009     2329
2010     2074
2011     2021
2012     3024
2013     7744
2014     1891
2015     3690
2016     2375
2017     3099
2018     2566
2019     3684
2020     3618
2021     3259
Name: Date Planted, dtype: int64