# Tools for Analytics Final Project ###

Jaeseop Shin / js6364  &. Hyunjin Jun / hj2642

## Environment Setup

In [1]:
# Importing all libraries used for the project

import numpy as np
import math
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import geopandas as gpd
import requests
import psycopg2
import pathlib
import os
import shapely

from sodapy import Socrata
from sqlalchemy import create_engine, text
from shapely.geometry import Point, Polygon
from geoalchemy2 import Geometry, WKTElement
from shapely.geometry.base import BaseGeometry
from datetime import datetime
from dateutil.relativedelta import relativedelta

# SQL Data
db_username = "ariez"
db_name = "postgres"
db_url = f"postgresql+psycopg2://{db_username}@localhost/{db_name}"
db_schema = "schema.sql"
# directory where DB queries for Part 3 will be saved
QUERY_DIR = pathlib.Path("queries")

# Make sure the QUERY_DIRECTORY exists
if not QUERY_DIR.exists():
    QUERY_DIR.mkdir()

## Part 1. Data Processing

### Data Setup

In [2]:
app_token = "2Hn2wwabCLXVYhGN4b9tEtJ11"


# Downloading NYC 311 data
url_311 = "data.cityofnewyork.us"
set_311 = "erm2-nwe9"
client_311 = Socrata(url_311, app_token)
client_311.timeout = 60
# Filtering data after 2015
where_311 = "date_extract_y(created_date)>=2015 AND created_date<='2023-09-30'"
# Data to data frame
data_311 = client_311.get(set_311, where=where_311, limit=20)
df_311 = pd.DataFrame.from_records(data_311)
#df_311.to_csv("nyc_311.csv")

# Downloading NYC Tree data
url_tree = "data.cityofnewyork.us"
set_tree = "uvpi-gqnh"
client_tree = Socrata(url_tree, app_token)
client_tree.timeout = 60
# Data to data frame
data_tree = client_tree.get(set_tree, limit=20)
df_tree = pd.DataFrame.from_records(data_tree)
#df_tree.to_csv("nyc_tree.csv")

# Loading local data files
data_dir = pathlib.Path("data")
rent_path = data_dir / "zillow_rent_data.csv"          
df_rent = pd.read_csv(rent_path)
nyc_zipcodes_shp_path = data_dir / "nyc_zipcodes.shp"
gdf_zipcode = gpd.read_file(nyc_zipcodes_shp_path)
gdf_zipcode = gdf_zipcode.to_crs("EPSG:4326")

### Data Cleaning & Filtering 

In [3]:
def clean_filter(data_frame, column_use, column_rename, column_type):
    """Removes unnecessary columns, rename columns, confirm types of columns, discard invalid data"""
    data_frame = data_frame[column_use]
    data_frame = data_frame.rename(columns=column_rename)
    for column, dtype in column_type.items():
        if dtype == 'datetime':
            data_frame[column] = pd.to_datetime(data_frame[column], errors='coerce')
        else:
            data_frame[column] = data_frame[column].astype(dtype, errors='ignore')
    data_frame.dropna(inplace=True)
    return data_frame

In [4]:
# NYC 311 Cleaning & Filtering
# Columns to use
use_311 = [
    'unique_key',
    'created_date',
    'complaint_type',
    'incident_zip',
    'city',
    'x_coordinate_state_plane',
    'y_coordinate_state_plane',
    'latitude',
    'longitude',
]
# Columns to rename
rename_311 = {
    'unique_key': 'complaint_id',
    'created_date': 'date',
    'complaint_type': 'complaint_type',
    'incident_zip': 'zipcode',
    'city': 'city',
    'x_coordinate_state_plane': 'x_coord',
    'y_coordinate_state_plane': 'y_coord',
    'latitude': 'latitude',
    'longitude': 'longitude',
}
# Column types
type_311 = {
    'complaint_id': 'int',
    'date': 'datetime',
    'complaint_type': 'str',
    'zipcode': 'int',
    'city': 'str',
    'x_coord': 'float',
    'y_coord': 'float',
    'latitude': 'float',
    'longitude': 'float',
}

df_311_f = clean_filter(df_311, use_311, rename_311, type_311)

# Converting latitude and longitude into Geometry data
df_311_f['geometry'] = df_311_f.apply(lambda x: Point(float(x['longitude']), float(x['latitude'])), axis=1)

# Converting DataFrame to GeoDataFrame
gdf_311_f = gpd.GeoDataFrame(df_311_f, geometry='geometry')

# Setting the coordinate reference system
gdf_311_f.set_crs(epsg=4326, inplace=True)
gdf_311_f

Unnamed: 0,complaint_id,date,complaint_type,zipcode,city,x_coord,y_coord,latitude,longitude,geometry
0,58970426,2023-09-30 00:00:00,Food Poisoning,10003,NEW YORK,987037.0,207017.0,40.73489,-73.989944,POINT (-73.98994 40.73489)
1,58958037,2023-09-29 23:59:53,Noise - Residential,11433,JAMAICA,1041450.0,193016.0,40.696276,-73.793722,POINT (-73.79372 40.69628)
2,58963337,2023-09-29 23:59:40,For Hire Vehicle Complaint,10002,NEW YORK,986676.0,202759.0,40.723203,-73.991248,POINT (-73.99125 40.72320)
3,58967293,2023-09-29 23:59:10,Noise - Residential,11216,BROOKLYN,1001313.0,187243.0,40.680599,-73.938481,POINT (-73.93848 40.68060)
4,58957297,2023-09-29 23:59:00,For Hire Vehicle Complaint,10002,NEW YORK,986676.0,202759.0,40.723203,-73.991248,POINT (-73.99125 40.72320)
5,58962342,2023-09-29 23:58:59,Illegal Parking,11210,BROOKLYN,1000975.0,169168.0,40.630988,-73.939744,POINT (-73.93974 40.63099)
6,58958331,2023-09-29 23:58:46,PLUMBING,11205,BROOKLYN,994968.0,191527.0,40.692368,-73.96135,POINT (-73.96135 40.69237)
7,58960919,2023-09-29 23:58:38,Blocked Driveway,10458,BRONX,1016290.0,256884.0,40.871704,-73.884151,POINT (-73.88415 40.87170)
8,58966561,2023-09-29 23:58:24,Litter Basket Request,11201,BROOKLYN,985729.0,195387.0,40.702969,-73.994666,POINT (-73.99467 40.70297)
9,58959265,2023-09-29 23:58:24,Illegal Parking,10458,BRONX,1016351.0,257750.0,40.87408,-73.883926,POINT (-73.88393 40.87408)


In [5]:
# NYC Tree Cleaning & Filtering
# Columns to use
use_tree = [
    'tree_id',
    'status',
    'health',
    'spc_common',
    'zipcode',
    'zip_city',
    'latitude',
    'longitude',
    'x_sp',
    'y_sp',
]
# Columns to rename
rename_tree = {
    'tree_id': 'tree_id',
    'status': 'status',
    'health': 'health',
    'spc_common': 'species',
    'zipcode': 'zipcode',
    'zip_city': 'city',
    'latitude': 'latitude',
    'longitude': 'longitude',
    'x_sp': 'x_coord',
    'y_sp': 'y_coord',
}
# Column types
type_tree = {
    'tree_id': 'int',
    'status': 'str',
    'health': 'str',
    'species': 'str',
    'zipcode': 'int',
    'city': 'str',
    'latitude': 'float',
    'longitude': 'float',
    'x_coord': 'float',
    'y_coord': 'float',
}

df_tree_f = clean_filter(df_tree, use_tree, rename_tree, type_tree)

# Converting latitude and longitude into Geometry data
df_tree_f['geometry'] = df_tree_f.apply(lambda x: Point(float(x['longitude']), float(x['latitude'])), axis=1)

# Converting DataFrame to GeoDataFrame
gdf_tree_f = gpd.GeoDataFrame(df_tree_f, geometry='geometry')

# Setting the coordinate reference system
gdf_tree_f.set_crs(epsg=4326, inplace=True)
gdf_tree_f.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 11 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   tree_id    20 non-null     int64   
 1   status     20 non-null     object  
 2   health     20 non-null     object  
 3   species    20 non-null     object  
 4   zipcode    20 non-null     int64   
 5   city       20 non-null     object  
 6   latitude   20 non-null     float64 
 7   longitude  20 non-null     float64 
 8   x_coord    20 non-null     float64 
 9   y_coord    20 non-null     float64 
 10  geometry   20 non-null     geometry
dtypes: float64(4), geometry(1), int64(2), object(4)
memory usage: 1.8+ KB


In [6]:
# Zillow Rent Cleaning & Filtering
# Columns to remove
remove_rent = [
    'RegionID',
    'SizeRank',
    'RegionType',
    'StateName',
    'Metro',
    'CountyName',
]
# Columns to rename
rename_rent = {
    'RegionName': 'zipcode',
    'State': 'state',
    'City': 'city',
}
# Column types
type_rent = {
    'zipcode': 'int',
    'state': 'str',
    'city': 'str'
}

# Removing and renaming columns
df_rent_f = df_rent.drop(columns=remove_rent, axis=1)
df_rent_f = df_rent_f.rename(columns=rename_rent)
# Confirming Data type
for column, dtype in type_rent.items():
    df_rent_f[column] = df_rent_f[column].astype(dtype, errors='raise')
for column in df_rent_f.iloc[:, 3:]:
    df_rent_f[column] = df_rent_f[column].astype('float', errors='raise')
# Discarding blank data
df_rent_f.dropna(inplace=True)
# Filtering for NY / NYC
df_rent_f = df_rent_f.loc[(df_rent_f['state'] == "NY") & (df_rent_f['city'] == "New York")]

df_rent_f.head()

Unnamed: 0,zipcode,state,city,2015-01-31,2015-02-28,2015-03-31,2015-04-30,2015-05-31,2015-06-30,2015-07-31,...,2022-12-31,2023-01-31,2023-02-28,2023-03-31,2023-04-30,2023-05-31,2023-06-30,2023-07-31,2023-08-31,2023-09-30
15,11226,NY,New York,1944.609891,1971.608676,2044.189151,2061.734126,2076.492259,2057.344849,2075.751863,...,2695.164727,2680.6837,2676.791284,2697.414702,2706.080701,2726.381017,2753.966159,2770.403005,2785.320137,2762.435713
24,10025,NY,New York,3068.951823,3075.667915,3073.716581,3110.088995,3131.760932,3162.091621,3172.310286,...,3763.796671,3774.013274,3767.374235,3766.943012,3803.235789,3879.913846,3971.395455,4016.496554,4037.765725,4032.900347
41,11206,NY,New York,2482.829299,2482.958545,2566.410038,2603.575953,2687.270484,2732.849355,2771.73729,...,3401.226252,3420.930559,3495.553722,3540.441253,3593.268056,3600.632607,3661.238073,3684.896166,3690.772235,3695.179857
42,11221,NY,New York,2125.738807,2138.483563,2143.894936,2196.188856,2209.614682,2257.257233,2277.339386,...,3014.667597,2980.930635,3011.124566,3056.167044,3084.347289,3117.693892,3153.414698,3201.922546,3215.32572,3225.197671
67,11235,NY,New York,1687.789898,1728.897223,1766.137033,1817.02508,1823.480961,1825.850122,1810.993372,...,2381.58338,2367.886882,2416.248737,2413.186585,2406.438603,2357.454087,2399.564759,2422.995739,2457.556041,2468.319048


In [7]:
# Zipcode Cleaning & Filtering
# Columns to use
use_gdf = [
    'ZIPCODE',
    'PO_NAME',
    'STATE',
    'COUNTY',
    'geometry',
]
# Columns to rename
rename_gdf = {
    'ZIPCODE': 'zipcode',
    'PO_NAME': 'city',
    'STATE': 'state',
    'COUNTY': 'county',
    'geometry': 'geometry',
}
# Column types
type_gdf = {
    'zipcode': 'int',
    'city': 'str',
    'state': 'str',
    'county': 'str',
    'geometry': 'geometry',
}

gdf_zipcode_f = clean_filter(gdf_zipcode, use_gdf, rename_gdf, type_gdf)
gdf_zipcode_f.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 263 entries, 0 to 262
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   zipcode   263 non-null    int64   
 1   city      263 non-null    object  
 2   state     263 non-null    object  
 3   county    263 non-null    object  
 4   geometry  263 non-null    geometry
dtypes: geometry(1), int64(1), object(3)
memory usage: 10.4+ KB


## Part 2. Storing Data

In [8]:
def setup_new_postgis_database(user_name, db_name):
    """Dropping Database if already exists"""
    os.system(f"dropdb -U {user_name} --if-exists {db_name}")
    """Setting up a new PostGis Database"""
    os.system(f"createdb -U {user_name} {db_name}")
    os.system(f"psql -U {user_name} --dbname {db_name} -c 'CREATE EXTENSION postgis;'")

In [9]:
setup_new_postgis_database(db_username, db_name)

CREATE EXTENSION


### Creating Tables

In [10]:
engine = create_engine(db_url)

In [11]:
# Defining the SQL statements to create 311, tree, rent, and zipcode tables
# NYC 311 Schema
nyc_311_schema = """
CREATE TABLE IF NOT EXISTS nyc_311 
(
    complaint_id INTEGER PRIMARY KEY,
    date DATE,
    complaint_type TEXT,
    zipcode INTEGER,
    city TEXT,
    x_coord INTEGER,
    y_coord INTEGER,
    latitude FLOAT,
    longitude FLOAT,
    geometry GEOMETRY(Point, 4326)
);

CREATE INDEX IF NOT EXISTS idx_nyc_311_geom ON nyc_311 USING GIST (geometry);
"""
# NYC Tree Schema
nyc_tree_schema = """
CREATE TABLE IF NOT EXISTS nyc_tree
(
    tree_id INTEGER PRIMARY KEY,
    status TEXT,
    health TEXT,
    species TEXT,
    zipcode INTEGER,
    city TEXT,
    latitude FLOAT,
    longitude FLOAT,
    x_coord FLOAT,
    y_coord FLOAT,
    geometry GEOMETRY(Point, 4326)
);

CREATE INDEX IF NOT EXISTS idx_nyc_tree_geom ON nyc_tree USING GIST (geometry);
"""

# NYC Rent Schema
# Start and end dates
start_date = datetime(2015, 1, 31)
end_date = datetime(2023, 9, 30)

# Generate monthly dates between start and end
current_date = start_date
dates = []
while current_date <= end_date:
    date_str = current_date.strftime('"%Y-%m-%d" FLOAT')
    dates.append(date_str)
    current_date += relativedelta(months=1)

# Create the schema with generated dates
nyc_rent_schema = """
CREATE TABLE IF NOT EXISTS nyc_rent
(
    zipcode INTEGER PRIMARY KEY,
    state TEXT,
    city TEXT,
    {}
)
""".format(",\n    ".join(dates))

# NYC Zipcode Schema
nyc_zipcode_schema = """
CREATE TABLE IF NOT EXISTS nyc_zipcode
(
    zipcode INTEGER PRIMARY KEY,
    city TEXT,
    state TEXT,
    county TEXT,
    geometry GEOMETRY(Polygon, 4326)
);

CREATE INDEX IF NOT EXISTS idx_nyc_zipcode_geom ON nyc_zipcode USING GIST (geometry);
"""

In [12]:
# Executing the schema files to create tables
def create_table(engine, table_schema):
    """Creating a table using the SQL schema"""
    try:
        with engine.connect() as connection:
            connection.execute(text(table_schema))
        print("Table Created")
    except Exception as e:
        print(f"Error: {e}")

In [13]:
# Creating each table
create_table(engine, nyc_311_schema)
create_table(engine, nyc_tree_schema)
create_table(engine, nyc_rent_schema)
create_table(engine, nyc_zipcode_schema)

Table Created
Table Created
Table Created
Table Created


### Adding Data to Database

In [14]:
def write_dataframes_to_table(tablename_to_dataframe, engine):
    """Adding data to SQL table"""
    for table_name, df in tablename_to_dataframe.items():
        # Checking if the dataframe is a GeoDataframe
        if isinstance(df, gpd.GeoDataFrame):
            # Converting Shapely geometries to WKTElement
            df['geometry'] = df['geometry'].apply(
                lambda x: WKTElement(x.wkt, srid=4326) if isinstance(x, BaseGeometry) else x
            )
            geometry_type = 'GEOMETRY'  # Default type
            # GeoDataframe to SQL
            try:
                df.to_sql(table_name, con=engine, if_exists='replace', index=False,
                          dtype={'geometry': Geometry(geometry_type, srid=4326)})
                print(f"Data in {table_name}")
            except Exception as e:
                print(f"{table_name} Error: {e}")
        else:
            # Dataframe to SQL
            try:
                df.to_sql(table_name, con=engine, if_exists='replace', index=False)
                print(f"Data in {table_name}")
            except Exception as e:
                print(f"{table_name} Error: {e}")

In [15]:
gdf_zipcode_sql = gdf_zipcode_f.copy()
gdf_311_sql = gdf_311_f.copy()
gdf_tree_sql = gdf_tree_f.copy()
df_rent_sql = df_rent_f.copy()
# Dataframe to Table mapping
tablename_to_dataframe = {
    "nyc_zipcodes": gdf_zipcode_sql,
    "311_complaints": gdf_311_sql,
    "nyc_trees": gdf_tree_sql,
    "nyc_rents": df_rent_sql,
}

In [16]:
write_dataframes_to_table(tablename_to_dataframe, engine)



Data in nyc_zipcodes
Data in 311_complaints
Data in nyc_trees
Data in nyc_rents




## Part 3: Understanding the Data

In [17]:
# Helper function to write the queries to file
def write_query_to_file(query, path):
    """Writing the SQL query to a file"""
    with open(path, 'w') as f:
        f.write(query)

### Query 1

In [18]:
QUERY_1_FILENAME = QUERY_DIR / "query_1.sql"

# SQL Query
QUERY_1 = """
SELECT zipcode, COUNT(complaint_id) as ttl_complaints
FROM "311_complaints"
WHERE DATE(date) BETWEEN '2022-10-01' AND '2023-09-30'
GROUP BY zipcode
ORDER BY ttl_complaints DESC;
"""

# Writing SQL to a file
write_query_to_file(QUERY_1, QUERY_1_FILENAME)

# Executing Query
with engine.connect() as conn:
    result = conn.execute(text(QUERY_1))
    for row in result:
        print(row)

(10002, 3)
(11205, 2)
(10458, 2)
(11216, 2)
(11420, 1)
(10472, 1)
(11201, 1)
(10003, 1)
(11210, 1)
(11433, 1)
(11226, 1)
(11237, 1)
(11249, 1)
(10031, 1)
(10034, 1)


### Query 2

In [19]:
QUERY_2_FILENAME = QUERY_DIR / "query_2.sql"

# SQL Query
QUERY_2 = """
SELECT zipcode, COUNT(tree_id) as ttl_trees
FROM nyc_trees
GROUP BY zipcode
ORDER BY ttl_trees DESC
LIMIT 10;
"""

# Writing SQL to a file
write_query_to_file(QUERY_2, QUERY_2_FILENAME)

# Executing Query
with engine.connect() as conn:
    result = conn.execute(text(QUERY_2))
    for row in result:
        print(row)

(11205, 3)
(11105, 2)
(11211, 2)
(10023, 2)
(10308, 1)
(11223, 1)
(11375, 1)
(10305, 1)
(10019, 1)
(11214, 1)


### Query 3

In [20]:
QUERY_3_FILENAME = QUERY_DIR / "query_3.sql"

# SQL Query
QUERY_3 = """
SELECT top_trees.zipcode AS "Zip Code",
    TO_CHAR(ROUND(AVG(rents."2023-08-31")::numeric, 2),
    'FM9,999.99') AS "Average Rent"
FROM (
    SELECT zipcode, COUNT(tree_id) as total_trees
    FROM nyc_trees
    GROUP BY zipcode
    ORDER BY total_trees DESC
    LIMIT 10
) AS top_trees
JOIN nyc_rents rents ON top_trees.zipcode = rents.zipcode
GROUP BY top_trees.zipcode, top_trees.total_trees
ORDER BY top_trees.total_trees DESC;
"""



# Writing SQL to a file
write_query_to_file(QUERY_3, QUERY_3_FILENAME)

# Executing Query
with engine.connect() as conn:
    result = conn.execute(text(QUERY_3))
    for row in result:
        print(row)

(11205, '3,497.47')
(10023, '4,370.07')
(11211, '4,342.65')
(10019, '4,233.43')
(11375, '2,743.4')


### Query 4

In [22]:
QUERY_4_FILENAME = QUERY_DIR / "query_4.sql"

# SQL Query
QUERY_4 = """
WITH LowRent AS (
    SELECT r.zipcode,
           TO_CHAR(ROUND(AVG(r."2023-01-31")::numeric, 2), 'FM999,999.99') AS average_rent,
           COUNT(t.tree_id) as ttl_trees,
           COUNT(c.complaint_id) as ttl_complaints
    FROM nyc_rents r
    JOIN nyc_trees t ON r.zipcode = t.zipcode
    JOIN "311_complaints" c ON r.zipcode = c.zipcode
    GROUP BY r.zipcode
    ORDER BY AVG(r."2023-01-31")
    LIMIT 5
), HighRent AS (
    SELECT r.zipcode,
           TO_CHAR(ROUND(AVG(r."2023-01-31")::numeric, 2), 'FM999,999.99') AS average_rent,
           COUNT(t.tree_id) as ttl_trees,
           COUNT(c.complaint_id) as ttl_complaints
    FROM nyc_rents r
    JOIN nyc_trees t ON r.zipcode = t.zipcode
    JOIN "311_complaints" c ON r.zipcode = c.zipcode
    GROUP BY r.zipcode
    ORDER BY AVG(r."2023-01-31") DESC
    LIMIT 5
)
SELECT * FROM LowRent
UNION ALL
SELECT * FROM HighRent;
"""

# Writing SQL to a file
write_query_to_file(QUERY_4, QUERY_4_FILENAME)

# Executing Query
with engine.connect() as conn:
    result = conn.execute(text(QUERY_4))
    for row in result:
        print(row)

(11226, '2,680.68', 1, 1)
(11205, '3,311.98', 6, 6)
(11205, '3,311.98', 6, 6)
(11226, '2,680.68', 1, 1)


### Query 5

In [24]:
QUERY_5_FILENAME = QUERY_DIR / "query_5.sql"

# SQL Query
QUERY_5 = """
SELECT z.zipcode, COUNT(t.tree_id) as ttl_trees
FROM nyc_trees t 
JOIN nyc_zipcodes z ON ST_Within(t.geometry, z.geometry)
GROUP BY z.zipcode
ORDER BY ttl_trees DESC
LIMIT 10;
"""

# Writing SQL to a file
write_query_to_file(QUERY_5, QUERY_5_FILENAME)

# Executing Query
with engine.connect() as conn:
    result = conn.execute(text(QUERY_5))
    for row in result:
        print(row)

(11205, 3)
(10023, 2)
(11105, 2)
(11211, 2)
(11214, 1)
(11215, 1)
(11223, 1)
(11226, 1)
(11357, 1)
(11373, 1)


### Query 6

In [27]:
QUERY_6_FILENAME = QUERY_DIR / "query_6.sql"

# SQL Query
QUERY_6 = """
SELECT t.tree_id, t.species, t.health, t.status, ST_AsText(t.geometry) as location
FROM nyc_trees t
WHERE ST_DWithin(
    t.geometry,
    ST_SetSRID(ST_MakePoint(-73.96253174434912, 40.80737875669467), 4326),
    0.804672
)
"""

# Writing SQL to a file
write_query_to_file(QUERY_6, QUERY_6_FILENAME)

# Executing Query
with engine.connect() as conn:
    result = conn.execute(text(QUERY_6))
    for row in result:
        print(row)

(180683, 'red maple', 'Fair', 'Alive', 'POINT(-73.84421522 40.72309177)')
(200540, 'pin oak', 'Fair', 'Alive', 'POINT(-73.81867946 40.79411067)')
(204026, 'honeylocust', 'Good', 'Alive', 'POINT(-73.9366077 40.71758074)')
(204337, 'honeylocust', 'Good', 'Alive', 'POINT(-73.93445616 40.71353749)')
(189565, 'American linden', 'Good', 'Alive', 'POINT(-73.97597938 40.66677776)')
(190422, 'honeylocust', 'Good', 'Alive', 'POINT(-73.98494997 40.77004563)')
(190426, 'honeylocust', 'Good', 'Alive', 'POINT(-73.98533807 40.77020969)')
(208649, 'American linden', 'Good', 'Alive', 'POINT(-73.98729652 40.76272385)')
(209610, 'honeylocust', 'Good', 'Alive', 'POINT(-74.07625483 40.59657931)')
(192755, 'London planetree', 'Fair', 'Alive', 'POINT(-73.96974394 40.58635725)')
(203719, 'London planetree', 'Good', 'Alive', 'POINT(-73.91117077 40.78242823)')
(203726, 'London planetree', 'Poor', 'Alive', 'POINT(-73.91201957 40.78173511)')
(195202, 'London planetree', 'Fair', 'Alive', 'POINT(-74.16267038 40.557

## Part 4: Visualizing the Data

### Visualization 1

In [None]:
# use a more descriptive name for your function
def plot_visual_1(dataframe):
    figure, axes = plt.subplots(figsize=(20, 10))
    
    values = "..."  # use the dataframe to pull out values needed to plot
    
    # you may want to use matplotlib to plot your visualizations;
    # there are also many other plot types (other 
    # than axes.plot) you can use
    axes.plot(values, "...")
    # there are other methods to use to label your axes, to style 
    # and set up axes labels, etc
    axes.set_title("Some Descriptive Title")
    
    plt.show()

In [None]:
def get_data_for_visual_1():
    # Query your database for the data needed.
    # You can put the data queried into a pandas/geopandas dataframe, if you wish
    raise NotImplementedError()

In [None]:
some_dataframe = get_data_for_visual_1()
plot_visual_1(some_dataframe)