# Urban Forestry and Bird Populations

--- introduction goes here ---

In [1]:
# Import packages
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
from numpy import nan
import plotly.express as px


--API goes here--

### Clean Bird Dataset

In [2]:
# Read the bird csv file
bird_data = pd.read_csv("bird-survey-results-for-areas-in-the-city-of-melbourne-february-and-march-2018.csv")
bird_data.head(3)

Unnamed: 0,Sighting Date,Common Name,Scientific Name,Sighting Count,Victorian Biodiversity Atlas Code,lat,lon,loc1_desc,lat2,lon2,loc2_desc,site_name,Location 2,Location 1
0,2018-03-12,Australian Magpie,Gymnorhina tibicen,2,10705,-37.8038,144.9118,Dynon Road Tidal Canal Wildlife Sanctuary Down...,,,,Dynon Road Tidal Canal Wildlife Sanctuary,,"-37.8038, 144.9118"
1,2018-02-28,Australian White Ibis,Threskiornis molucca,141,10179,-37.8038,144.9118,Dynon Road Tidal Canal Wildlife Sanctuary Down...,,,,Dynon Road Tidal Canal Wildlife Sanctuary,,"-37.8038, 144.9118"
2,2018-03-12,Australian White Ibis,Threskiornis molucca,83,10179,-37.8038,144.9118,Dynon Road Tidal Canal Wildlife Sanctuary Down...,,,,Dynon Road Tidal Canal Wildlife Sanctuary,,"-37.8038, 144.9118"


In [3]:
# View info on bird dataset
bird_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 498 entries, 0 to 497
Data columns (total 14 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   Sighting Date                      498 non-null    object 
 1   Common Name                        498 non-null    object 
 2   Scientific Name                    498 non-null    object 
 3   Sighting Count                     498 non-null    int64  
 4   Victorian Biodiversity Atlas Code  498 non-null    int64  
 5   lat                                498 non-null    float64
 6   lon                                498 non-null    float64
 7   loc1_desc                          498 non-null    object 
 8   lat2                               248 non-null    float64
 9   lon2                               248 non-null    float64
 10  loc2_desc                          248 non-null    object 
 11  site_name                          498 non-null    object 

In [4]:
# Delete columns with more than 50% of empty values
del bird_data['lat2']
del bird_data['lon2']
del bird_data['loc2_desc']
del bird_data['Location 2']
del bird_data['Location 1']

In [5]:
# Convert the data type of the Date column into DateTime
bird_data['Sighting Date'] = pd.to_datetime(bird_data['Sighting Date'])

In [6]:
# View cleaned data
bird_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 498 entries, 0 to 497
Data columns (total 9 columns):
 #   Column                             Non-Null Count  Dtype         
---  ------                             --------------  -----         
 0   Sighting Date                      498 non-null    datetime64[ns]
 1   Common Name                        498 non-null    object        
 2   Scientific Name                    498 non-null    object        
 3   Sighting Count                     498 non-null    int64         
 4   Victorian Biodiversity Atlas Code  498 non-null    int64         
 5   lat                                498 non-null    float64       
 6   lon                                498 non-null    float64       
 7   loc1_desc                          498 non-null    object        
 8   site_name                          498 non-null    object        
dtypes: datetime64[ns](1), float64(2), int64(2), object(4)
memory usage: 35.1+ KB


In [7]:
# View cleaned data
bird_data.head(3)

Unnamed: 0,Sighting Date,Common Name,Scientific Name,Sighting Count,Victorian Biodiversity Atlas Code,lat,lon,loc1_desc,site_name
0,2018-03-12,Australian Magpie,Gymnorhina tibicen,2,10705,-37.8038,144.9118,Dynon Road Tidal Canal Wildlife Sanctuary Down...,Dynon Road Tidal Canal Wildlife Sanctuary
1,2018-02-28,Australian White Ibis,Threskiornis molucca,141,10179,-37.8038,144.9118,Dynon Road Tidal Canal Wildlife Sanctuary Down...,Dynon Road Tidal Canal Wildlife Sanctuary
2,2018-03-12,Australian White Ibis,Threskiornis molucca,83,10179,-37.8038,144.9118,Dynon Road Tidal Canal Wildlife Sanctuary Down...,Dynon Road Tidal Canal Wildlife Sanctuary


### Clean Tree Dataset

In [8]:
#read the tree csv file
tree_data = pd.read_csv("trees-with-species-and-dimensions-urban-forest.csv")
tree_data.head(3)

Unnamed: 0,CoM ID,Common Name,Scientific Name,Genus,Family,Diameter Breast Height,Year Planted,Date Planted,Age Description,Useful Life Expectency,Useful Life Expectency Value,Precinct,Located in,UploadDate,CoordinateLocation,Latitude,Longitude,Easting,Northing,geolocation
0,1029241,London Plane,Platanus x acerifolia,Platanus,Platanaceae,59.0,1997,1997-12-04,Mature,6-10 years (>50% canopy),10.0,,Street,2021-01-10,"-37.834844802361296, 144.97624052189326",-37.834845,144.976241,321912.33,5810579.39,"-37.834844802361296, 144.97624052189326"
1,1357481,Cyprus Plane,Platanus orientalis,Platanus,Platanaceae,8.0,2008,2008-03-12,Juvenile,61+ years,80.0,,Park,2021-01-10,"-37.82112379777012, 144.97204161951672",-37.821124,144.972042,321509.73,5812093.94,"-37.82112379777012, 144.97204161951672"
2,1022615,Spotted Gum,Corymbia maculata,Corymbia,Myrtaceae,73.0,1997,1997-11-10,Mature,31-60 years,60.0,,Street,2021-01-10,"-37.800407968829234, 144.9624661325885",-37.800408,144.962466,320616.72,5814374.35,"-37.800407968829234, 144.9624661325885"


In [9]:
# View tree info
tree_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 76928 entries, 0 to 76927
Data columns (total 20 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   CoM ID                        76928 non-null  int64  
 1   Common Name                   76903 non-null  object 
 2   Scientific Name               76927 non-null  object 
 3   Genus                         76927 non-null  object 
 4   Family                        76927 non-null  object 
 5   Diameter Breast Height        24986 non-null  float64
 6   Year Planted                  76928 non-null  int64  
 7   Date Planted                  76928 non-null  object 
 8   Age Description               24969 non-null  object 
 9   Useful Life Expectency        24969 non-null  object 
 10  Useful Life Expectency Value  24969 non-null  float64
 11  Precinct                      0 non-null      float64
 12  Located in                    76926 non-null  object 
 13  U

In [10]:
# Delete columns with more than 50% of empty values
del tree_data['Diameter Breast Height']
del tree_data['Age Description']
del tree_data['Useful Life Expectency']
del tree_data['Useful Life Expectency Value']
del tree_data['Precinct']

In [11]:
# Convert the data type of Date columns into DateTime
tree_data['Date Planted'] = pd.to_datetime(tree_data['Date Planted'])
tree_data['UploadDate'] = pd.to_datetime(tree_data['UploadDate'])

In [12]:
# View cleaned data
tree_data.head(3)

Unnamed: 0,CoM ID,Common Name,Scientific Name,Genus,Family,Year Planted,Date Planted,Located in,UploadDate,CoordinateLocation,Latitude,Longitude,Easting,Northing,geolocation
0,1029241,London Plane,Platanus x acerifolia,Platanus,Platanaceae,1997,1997-12-04,Street,2021-01-10,"-37.834844802361296, 144.97624052189326",-37.834845,144.976241,321912.33,5810579.39,"-37.834844802361296, 144.97624052189326"
1,1357481,Cyprus Plane,Platanus orientalis,Platanus,Platanaceae,2008,2008-03-12,Park,2021-01-10,"-37.82112379777012, 144.97204161951672",-37.821124,144.972042,321509.73,5812093.94,"-37.82112379777012, 144.97204161951672"
2,1022615,Spotted Gum,Corymbia maculata,Corymbia,Myrtaceae,1997,1997-11-10,Street,2021-01-10,"-37.800407968829234, 144.9624661325885",-37.800408,144.962466,320616.72,5814374.35,"-37.800407968829234, 144.9624661325885"


# EDA - Birds

### 10 Most Common Birds

In [16]:
# Group by Common Name and sum the Sighting Count
top_birds = bird_data.groupby('Common Name')['Sighting Count'].sum().nlargest(10).reset_index()

# Plot the interactive bar chart
fig = px.bar(top_birds, 
             x='Common Name', 
             y='Sighting Count', 
             title='Top 10 Most Common Birds', 
             labels={'Common Name': 'Bird Species', 'Sighting Count': 'Sighting Count'},
             hover_data={'Sighting Count': True, 'Common Name': True},
            color_discrete_sequence=['#1f77b4'])

# Show the plot
fig.show()


### Most Common Bird Seeing Sights

In [17]:
# Group by site_name and sum the Sighting Count
top_sites = bird_data.groupby('site_name')['Sighting Count'].sum().nlargest(10).reset_index()

# Plot the interactive bar chart
fig = px.bar(top_sites, 
             x='site_name', 
             y='Sighting Count', 
             title='Top 10 Most Common Sites', 
             labels={'site_name': 'Site Names', 'Sighting Count': 'Sighting Count'},
             hover_data={'Sighting Count': True, 'site_name': True},
            color_discrete_sequence=['#1f77b4'])

# Show the plot
fig.show()

### Sighting of Bird Species Over Time

In [15]:
# Identify the top 10 most common birds
top_birds = bird_data.groupby('Common Name')['Sighting Count'].sum().nlargest(10).index

# Filter the dataset to include only the top 10 bird species
filtered_df = bird_data[bird_data['Common Name'].isin(top_birds)]

# Aggregate data by date and common name
time_series_data = filtered_df.groupby(['Sighting Date', 'Common Name'])['Sighting Count'].sum().reset_index()

# Plot the interactive time series data
fig = px.line(time_series_data, 
              x='Sighting Date', 
              y='Sighting Count', 
              color='Common Name', 
              title='Time Series of Top 10 Bird Species Sightings',
              labels={'Sighting Date': 'Date', 'Sighting Count': 'Sighting Count', 'Common Name': 'Bird Species'},
              hover_data={'Sighting Count': True, 'Sighting Date': True, 'Common Name': True})

fig.update_layout(legend_title_text='Bird Species')

# Show the plot
fig.show()


# EDA - Tree

### 10 Most Common Trees

In [28]:
import pandas as pd
import plotly.express as px

# Count the number of trees for each Common Name
tree_counts = tree_data['Common Name'].value_counts().nlargest(10).reset_index()
tree_counts.columns = ['Common Name', 'Count']

# Plot the bar chart
fig = px.bar(tree_counts,
             x='Common Name',
             y='Count',
             title='Number of Trees by Common Name',
             labels={'Common Name': 'Tree Species', 'Count': 'Number of Trees'},
            color_discrete_sequence=['#1f77b4'])

# Show the plot
fig.show()


### Tree Planting Trends Over Years

In [29]:
# Extract the year from the Date Planted column
tree_data['Year Planted'] = pd.to_datetime(tree_data['Date Planted']).dt.year

# Group by Year Planted and count the number of trees
yearly_plantings = tree_data.groupby('Year Planted').size().reset_index(name='Count')

# Plot the line graph
fig = px.line(yearly_plantings, 
              x='Year Planted', 
              y='Count', 
              title='Number of Trees Planted Each Year',
              labels={'Year Planted': 'Year', 'Count': 'Number of Trees'},
              markers=True,
             color_discrete_sequence=['#1f77b4'])

fig.show()


### Tree Age Distribution

In [30]:
# Calculate tree age
tree_data['Age'] = pd.Timestamp.now().year - tree_data['Year Planted']

# Plot the histogram
fig = px.histogram(tree_data, 
                   x='Age', 
                   title='Distribution of Tree Ages',
                   labels={'Age': 'Age (Years)'},
                   hover_data={'Age': True},
                  color_discrete_sequence=['#1f77b4'])

fig.show()


### Tree Located Places

In [31]:
# Count the number of trees for each Common Name
tree_counts = tree_data['Located in'].value_counts().reset_index()
tree_counts.columns = ['Located in', 'Count']

# Plot the bar chart
fig = px.bar(tree_counts,
             x='Count',
             y='Located in',
             title='Number of Trees Located in Streets and Parks',
             labels={'Located in': 'Tree Location', 'Count': 'Number of Trees'},
            orientation='h',
            color_discrete_sequence=['#1f77b4'])

# Show the plot
fig.show()

## Up Next
Birds Heat Map <br>
Tree Heat Map

Bird Geo <br>
Tree Geo