In [1]:
# Import packages
%matplotlib inline
import pandas as pd
import numpy as np
import geopandas
import matplotlib.pyplot as plt
import math

In [2]:
# Import and Organize Tree Survey

# Import tree survey data
data = geopandas.read_file('Data/Trees/2015 Street Tree Census - Tree Data/geo_export_f9cac9c3-66c4-45b6-899b-823a270d8754.shp')

# Turn tree survey data into a normal dataframe and remove a bunch of columns
data_nogeom = pd.DataFrame(data.drop(columns='geometry'), copy=True)
data_nogeom=data_nogeom.drop(['boro_ct', 'borocode', 'boroname', 'brnch_ligh',
       'brnch_othe', 'brnch_shoe', 'cb_num', 'cncldist','nta',
       'nta_name','spc_latin', 'st_assem', 'st_senate', 'state','stump_diam','user_type', 'x_sp', 'y_sp', 'zip_city'],axis=1)

data_no_geom=data_nogeom.dropna()

In [3]:
# Import and organize 311 tree fall data and merge with tree survey data

# Import fall data and drop irrelevant columns
data_falls = pd.read_csv('Data/311/Tree_Fall_311.csv',quoting=3)
data_falls = data_falls.drop([
       'Street Name', 'Cross Street 1', 'Cross Street 2',
       'Intersection Street 1', 'Intersection Street 2', 'Status', 'Borough',
       'X Coordinate (State Plane)', 'Y Coordinate (State Plane)'],axis=1)

# Remove the falls before 2015 (before survey)
data_falls['Created Date']= pd.to_datetime(data_falls['Created Date'])
data_falls = data_falls.where(data_falls['Created Date']>'2015-01-01')


In [4]:
# Remove trees from 311 without addresses and only use trees where branch fell (not entire tree)
data_falls = data_falls[data_falls['Incident Address'].isna()==False]
data_falls = data_falls[data_falls['Latitude'].isna()==False]
data_falls = data_falls[data_falls['Longitude'].isna()==False]
data_falls=data_falls[data_falls['Descriptor']=='Branch or Limb Has Fallen Down']

In [None]:
# Find closest tree from the street tree survey and then assign its tree_index to the 311 data frame
# Make a copy of the tree census data
df=data_no_geom.copy()

# Write a function to find tree ids from address and lat lon
def assign_tree_id(address,lat,lon,df=df):
    tempdf = df[df['address']==address]
    if tempdf.count()['tree_id']>=2:
        dis=np.sqrt(np.square(tempdf['latitude']-lat)+np.square(tempdf['longitude']-lon))
        tree_index = np.argmin(np.array(dis))
        return tempdf.tree_id.iloc[tree_index]
    elif tempdf.count()['tree_id']==0:
        return np.nan
    else:
        return tempdf['tree_id'].to_numpy()[0]
    
# find 311 branch fall tree ids
tree_ids = [assign_tree_id(address,lat,lon) for address,lat,lon in zip(data_falls['Incident Address'],data_falls['Latitude'],data_falls['Longitude'])]
        

In [29]:
# save fallen tree ids
data_falls['tree_id']=tree_ids
data_falls.to_csv('Data/Intermediate/tree_falls_with_id.csv', index=False)

In [31]:
# Import and organize 311 tree damage data and overhang calls and merge with tree survey data

# Import damage data and drop irrelevant columns
data_damage = pd.read_csv('Input_Data/Tree_Damage_311_Final.csv',quoting=3)
data_damage = data_damage.drop([
       'Street Name', 'Cross Street 1', 'Cross Street 2',
       'Intersection Street 1', 'Intersection Street 2', 'Status', 'Borough',
       'X Coordinate (State Plane)', 'Y Coordinate (State Plane)'],axis=1)

# Remove trees from 311 damages without addresses 
data_damage = data_damage[data_damage['Incident Address'].isna()==False]
data_damage = data_damage[data_damage['Latitude'].isna()==False]
data_damage = data_damage[data_damage['Longitude'].isna()==False]


In [33]:
# assign tree ids to 311 warning data

# Find closest tree from the street tree survey and then assign its tree_index to the 311 data frame
tree_ids_damage = [assign_tree_id(address,lat,lon) for address,lat,lon in zip(data_damage['Incident Address'],data_damage['Latitude'],data_damage['Longitude'])]
data_damage['tree_id']=tree_ids_damage

In [51]:
# Save file with tree ids for damages
data_damage.to_csv('Data/Intermediate/tree_damage_with_id.csv', index=False)

In [37]:
# export id columns too
tid_damage_df = pd.DataFrame(tree_ids_damage, columns=["tree_id"])
tid_damage_df.to_csv('Data/Intermediate/damage_tree_id.csv', index=False)

In [39]:
# Join tree falls and tree survey
# drop rows without a tree_id
data_falls = data_falls[data_falls['tree_id'].isna()==False]

In [46]:
# now merge
data_with_falls = data_nogeom.merge(data_falls,how="left",left_on="tree_id",right_on="tree_id")

In [47]:
# save this so we don't need to do it again
data_with_falls.to_csv('Data/Intermediate/tree_data_with_falls.csv', index=False)