# Tools for Analytics Final Project ###

Jaeseop Shin / js6364  &. Hyunjin Jun / hj2642

## Environment Setup

In [22]:
# Importing all libraries used for the project

import numpy as np
import math
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import geopandas as gpd
import requests
import psycopg2
import pathlib
import os
import shapely

from bs4 import BeautifulSoup
from sodapy import Socrata
from sqlalchemy import create_engine, text
from shapely.geometry import Point, Polygon
from geoalchemy2 import Geometry, WKTElement
from shapely.geometry.base import BaseGeometry

# SQL Data
db_username = "jin"
db_name = "postgres"
db_url = f"postgresql+psycopg2://{db_username}@localhost/{db_name}"
db_schema = "schema.sql"
# directory where DB queries for Part 3 will be saved
QUERY_DIR = pathlib.Path("queries")

# Make sure the QUERY_DIRECTORY exists
if not QUERY_DIR.exists():
    QUERY_DIR.mkdir()

## Part 1. Data Processing

### Data Setup

In [3]:
app_token = "2Hn2wwabCLXVYhGN4b9tEtJ11"


# Downloading NYC 311 data
url_311 = "data.cityofnewyork.us"
set_311 = "erm2-nwe9"
client_311 = Socrata(url_311, app_token)
client_311.timeout = 60
# Filtering data after 2015
where_311 = "date_extract_y(created_date)>=2015"
# Data to data frame
data_311 = client_311.get(set_311, where=where_311, limit=20)
df_311 = pd.DataFrame.from_records(data_311)
#df_311.to_csv("nyc_311.csv")

# Downloading NYC Tree data
url_tree = "data.cityofnewyork.us"
set_tree = "uvpi-gqnh"
client_tree = Socrata(url_tree, app_token)
client_tree.timeout = 60
# Data to data frame
data_tree = client_tree.get(set_tree, limit=20)
df_tree = pd.DataFrame.from_records(data_tree)
#df_tree.to_csv("nyc_tree.csv")

# Loading local data files
data_dir = pathlib.Path("data")
rent_path = data_dir / "zillow_rent_data.csv"          
df_rent = pd.read_csv(rent_path)
nyc_zipcodes_shp_path = data_dir / "nyc_zipcodes.shp"
gdf_zipcode = gpd.read_file(nyc_zipcodes_shp_path)
gdf_zipcode = gdf_zipcode.to_crs("EPSG:4326")

In [4]:
df_311

Unnamed: 0,unique_key,created_date,closed_date,agency,agency_name,complaint_type,descriptor,location_type,incident_zip,incident_address,...,open_data_channel_type,park_facility_name,park_borough,latitude,longitude,location,intersection_street_1,intersection_street_2,landmark,vehicle_type
0,59589235,2023-11-30T12:00:00.000,2023-11-30T12:00:00.000,DSNY,Department of Sanitation,Derelict Vehicles,Derelict Vehicles,Street,11355.0,137-75 GERANIUM AVENUE,...,PHONE,Unspecified,QUEENS,40.75014275617199,-73.82198495419226,"{'latitude': '40.75014275617199', 'longitude':...",,,,
1,59589245,2023-11-30T12:00:00.000,,DSNY,Department of Sanitation,Derelict Vehicles,Derelict Vehicles,Street,,,...,PHONE,Unspecified,BROOKLYN,,,,CLYMER STREET,CLYMER STREET,,
2,59587554,2023-11-30T01:21:01.000,,NYPD,New York City Police Department,Illegal Parking,Double Parked Blocking Vehicle,Street/Sidewalk,11385.0,1679 GROVE STREET,...,ONLINE,Unspecified,QUEENS,40.70252874288173,-73.91155921414276,"{'latitude': '40.70252874288173', 'longitude':...",GROVE STREET,CYPRESS AVENUE,GROVE STREET,SUV
3,59593739,2023-11-30T01:20:51.000,,NYPD,New York City Police Department,Illegal Parking,Blocked Hydrant,Street/Sidewalk,11215.0,689 6 AVENUE,...,MOBILE,Unspecified,BROOKLYN,40.660655143628304,-73.99054225150822,"{'latitude': '40.660655143628304', 'longitude'...",19 STREET,20 STREET,6 AVENUE,
4,59593633,2023-11-30T01:20:19.000,,NYPD,New York City Police Department,Blocked Driveway,Partial Access,Street/Sidewalk,11435.0,107-14 WALTHAM STREET,...,ONLINE,Unspecified,QUEENS,40.69308196645765,-73.80189634039738,"{'latitude': '40.69308196645765', 'longitude':...",107 AVENUE,SHORE AVENUE,WALTHAM STREET,
5,59587635,2023-11-30T01:20:11.000,,NYPD,New York City Police Department,Noise - Residential,Loud Television,Residential Building/House,11385.0,561 ONDERDONK AVENUE,...,MOBILE,Unspecified,QUEENS,40.70570880898938,-73.91011589680197,"{'latitude': '40.70570880898938', 'longitude':...",BLEECKER STREET,MENAHAN STREET,ONDERDONK AVENUE,
6,59584425,2023-11-30T01:19:19.000,,EDC,Economic Development Corporation,Noise - Helicopter,Other,Above Address,11201.0,237 CLINTON STREET,...,ONLINE,Unspecified,BROOKLYN,40.688171808197865,-73.99534850217154,"{'latitude': '40.688171808197865', 'longitude'...",CONGRESS STREET,VERANDAH PLACE,CLINTON STREET,
7,59593811,2023-11-30T01:18:33.000,,NYPD,New York City Police Department,Noise - Residential,Loud Music/Party,Residential Building/House,11106.0,31-35 CRESCENT STREET,...,ONLINE,Unspecified,QUEENS,40.76468368198577,-73.92718359841146,"{'latitude': '40.76468368198577', 'longitude':...",31 ROAD,31 DRIVE,CRESCENT STREET,
8,59582927,2023-11-30T01:17:56.000,,NYPD,New York City Police Department,Noise - Vehicle,Engine Idling,Street/Sidewalk,11201.0,211 SCHERMERHORN STREET,...,MOBILE,Unspecified,BROOKLYN,40.68875287329103,-73.9854468147971,"{'latitude': '40.68875287329103', 'longitude':...",HOYT STREET,BOND STREET,SCHERMERHORN STREET,Car
9,59584487,2023-11-30T01:17:13.000,,NYPD,New York City Police Department,Noise - Residential,Banging/Pounding,Residential Building/House,10030.0,162 WEST 144 STREET,...,ONLINE,Unspecified,MANHATTAN,40.820746211868745,-73.93906320797221,"{'latitude': '40.820746211868745', 'longitude'...",LENOX AVENUE,ADAM CLAYTON POWELL JR BOULEVARD,WEST 144 STREET,


### Data Cleaning & Filtering 

In [5]:
def clean_filter(data_frame, column_use, column_rename, column_type):
    """Removes unnecessary columns, rename columns, confirm types of columns, discard invalid data"""
    data_frame = data_frame[column_use]
    data_frame = data_frame.rename(columns=column_rename)
    for column, dtype in column_type.items():
        if dtype == 'datetime':
            data_frame[column] = pd.to_datetime(data_frame[column], errors='coerce')
        else:
            data_frame[column] = data_frame[column].astype(dtype, errors='ignore')
    data_frame.dropna(inplace=True)
    return data_frame

In [6]:
# NYC 311 Cleaning & Filtering
# Columns to use
use_311 = [
    'unique_key',
    'created_date',
    'complaint_type',
    'incident_zip',
    'city',
    'x_coordinate_state_plane',
    'y_coordinate_state_plane',
    'latitude',
    'longitude',
]
# Columns to rename
rename_311 = {
    'unique_key': 'complaint_id',
    'created_date': 'date',
    'complaint_type': 'complaint_type',
    'incident_zip': 'zipcode',
    'city': 'city',
    'x_coordinate_state_plane': 'x_coord',
    'y_coordinate_state_plane': 'y_coord',
    'latitude': 'latitude',
    'longitude': 'longitude',
}
# Column types
type_311 = {
    'complaint_id': 'int',
    'date': 'datetime',
    'complaint_type': 'str',
    'zipcode': 'int',
    'city': 'str',
    'x_coord': 'float',
    'y_coord': 'float',
    'latitude': 'float',
    'longitude': 'float',
}

df_311_f = clean_filter(df_311, use_311, rename_311, type_311)

# Converting latitude and longitude into Geometry data
df_311_f['geometry'] = df_311_f.apply(lambda x: Point(float(x['longitude']), float(x['latitude'])), axis=1)

# Converting DataFrame to GeoDataFrame
gdf_311_f = gpd.GeoDataFrame(df_311_f, geometry='geometry')

# Setting the coordinate reference system
gdf_311_f.set_crs(epsg=4326, inplace=True)
gdf_311_f

Unnamed: 0,complaint_id,date,complaint_type,zipcode,city,x_coord,y_coord,latitude,longitude,geometry
0,59589235,2023-11-30 12:00:00,Derelict Vehicles,11355,FLUSHING,1033573.0,212624.0,40.750143,-73.821985,POINT (-73.82198 40.75014)
2,59587554,2023-11-30 01:21:01,Illegal Parking,11385,RIDGEWOOD,1008772.0,195239.0,40.702529,-73.911559,POINT (-73.91156 40.70253)
3,59593739,2023-11-30 01:20:51,Illegal Parking,11215,BROOKLYN,986874.0,179971.0,40.660655,-73.990542,POINT (-73.99054 40.66066)
4,59593633,2023-11-30 01:20:19,Blocked Driveway,11435,JAMAICA,1039186.0,191847.0,40.693082,-73.801896,POINT (-73.80190 40.69308)
5,59587635,2023-11-30 01:20:11,Noise - Residential,11385,RIDGEWOOD,1009171.0,196398.0,40.705709,-73.910116,POINT (-73.91012 40.70571)
6,59584425,2023-11-30 01:19:19,Noise - Helicopter,11201,BROOKLYN,985540.0,189996.0,40.688172,-73.995349,POINT (-73.99535 40.68817)
7,59593811,2023-11-30 01:18:33,Noise - Residential,11106,ASTORIA,1004421.0,217880.0,40.764684,-73.927184,POINT (-73.92718 40.76468)
8,59582927,2023-11-30 01:17:56,Noise - Vehicle,11201,BROOKLYN,988286.0,190208.0,40.688753,-73.985447,POINT (-73.98545 40.68875)
9,59584487,2023-11-30 01:17:13,Noise - Residential,10030,NEW YORK,1001116.0,238303.0,40.820746,-73.939063,POINT (-73.93906 40.82075)
10,59593743,2023-11-30 01:16:22,Illegal Parking,11220,BROOKLYN,979769.0,172935.0,40.641342,-74.016146,POINT (-74.01615 40.64134)


In [7]:
# NYC Tree Cleaning & Filtering
# Columns to use
use_tree = [
    'tree_id',
    'status',
    'health',
    'spc_common',
    'zipcode',
    'zip_city',
    'latitude',
    'longitude',
    'x_sp',
    'y_sp',
]
# Columns to rename
rename_tree = {
    'tree_id': 'tree_id',
    'status': 'status',
    'health': 'health',
    'spc_common': 'species',
    'zipcode': 'zipcode',
    'zip_city': 'city',
    'latitude': 'latitude',
    'longitude': 'longitude',
    'x_sp': 'x_coord',
    'y_sp': 'y_coord',
}
# Column types
type_tree = {
    'tree_id': 'int',
    'status': 'str',
    'health': 'str',
    'species': 'str',
    'zipcode': 'int',
    'city': 'str',
    'latitude': 'float',
    'longitude': 'float',
    'x_coord': 'float',
    'y_coord': 'float',
}

df_tree_f = clean_filter(df_tree, use_tree, rename_tree, type_tree)

# Converting latitude and longitude into Geometry data
df_tree_f['geometry'] = df_tree_f.apply(lambda x: Point(float(x['longitude']), float(x['latitude'])), axis=1)

# Converting DataFrame to GeoDataFrame
gdf_tree_f = gpd.GeoDataFrame(df_tree_f, geometry='geometry')

# Setting the coordinate reference system
gdf_tree_f.set_crs(epsg=4326, inplace=True)
gdf_tree_f.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 11 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   tree_id    20 non-null     int64   
 1   status     20 non-null     object  
 2   health     20 non-null     object  
 3   species    20 non-null     object  
 4   zipcode    20 non-null     int64   
 5   city       20 non-null     object  
 6   latitude   20 non-null     float64 
 7   longitude  20 non-null     float64 
 8   x_coord    20 non-null     float64 
 9   y_coord    20 non-null     float64 
 10  geometry   20 non-null     geometry
dtypes: float64(4), geometry(1), int64(2), object(4)
memory usage: 1.8+ KB


In [8]:
# Zillow Rent Cleaning & Filtering
# Columns to remove
remove_rent = [
    'RegionID',
    'SizeRank',
    'RegionType',
    'StateName',
    'Metro',
    'CountyName',
]
# Columns to rename
rename_rent = {
    'RegionName': 'zipcode',
    'State': 'state',
    'City': 'city',
}
# Column types
type_rent = {
    'zipcode': 'int',
    'state': 'str',
    'city': 'str'
}

# Removing and renaming columns
df_rent_f = df_rent.drop(columns=remove_rent, axis=1)
df_rent_f = df_rent_f.rename(columns=rename_rent)
# Confirming Data type
for column, dtype in type_rent.items():
    df_rent_f[column] = df_rent_f[column].astype(dtype, errors='raise')
for column in df_rent_f.iloc[:, 3:]:
    df_rent_f[column] = df_rent_f[column].astype('float', errors='raise')
# Discarding blank data
df_rent_f.dropna(inplace=True)
df_rent_f.head()

Unnamed: 0,zipcode,state,city,2015-01-31,2015-02-28,2015-03-31,2015-04-30,2015-05-31,2015-06-30,2015-07-31,...,2022-12-31,2023-01-31,2023-02-28,2023-03-31,2023-04-30,2023-05-31,2023-06-30,2023-07-31,2023-08-31,2023-09-30
0,77494,TX,Katy,1606.206406,1612.779844,1622.201575,1630.392427,1632.4115,1636.206864,1644.894632,...,1994.653463,2027.438438,2042.237444,2049.325559,2016.531345,2023.438976,2031.558202,2046.144009,2053.486247,2055.771355
1,77449,TX,Katy,1257.81466,1255.268025,1262.170452,1274.955754,1285.526052,1295.665673,1296.650395,...,1749.6979,1738.217986,1747.30584,1758.407295,1758.891075,1762.980879,1771.751591,1779.338402,1795.384582,1799.63114
8,77433,TX,Cypress,1332.384333,1328.107408,1334.32529,1342.507107,1343.204774,1349.345048,1357.258039,...,1881.20455,1885.695935,1884.894986,1880.532012,1870.035369,1863.111029,1892.511066,1922.759295,1945.581823,1975.672556
15,11226,NY,New York,1944.609891,1971.608676,2044.189151,2061.734126,2076.492259,2057.344849,2075.751863,...,2695.164727,2680.6837,2676.791284,2697.414702,2706.080701,2726.381017,2753.966159,2770.403005,2785.320137,2762.435713
17,30044,GA,Lawrenceville,1157.569878,1168.554459,1173.937503,1176.423247,1180.259047,1183.618022,1193.861072,...,2125.698496,2113.96425,2170.556028,2160.008767,2191.894968,2181.78056,2161.711011,2117.99072,2119.319392,2055.476496


In [9]:
# Zipcode Cleaning & Filtering
# Columns to use
use_gdf = [
    'ZIPCODE',
    'PO_NAME',
    'STATE',
    'COUNTY',
    'geometry',
]
# Columns to rename
rename_gdf = {
    'ZIPCODE': 'zipcode',
    'PO_NAME': 'city',
    'STATE': 'state',
    'COUNTY': 'county',
    'geometry': 'geometry',
}
# Column types
type_gdf = {
    'zipcode': 'int',
    'city': 'str',
    'state': 'str',
    'county': 'str',
    'geometry': 'geometry',
}

gdf_zipcode_f = clean_filter(gdf_zipcode, use_gdf, rename_gdf, type_gdf)
gdf_zipcode_f.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 263 entries, 0 to 262
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   zipcode   263 non-null    int64   
 1   city      263 non-null    object  
 2   state     263 non-null    object  
 3   county    263 non-null    object  
 4   geometry  263 non-null    geometry
dtypes: geometry(1), int64(1), object(3)
memory usage: 10.4+ KB


## Part 2. Storing Data

In [23]:
def setup_new_postgis_database(user_name, db_name):
    """Dropping Database if already exists"""
    os.system(f"dropdb -U {user_name} --if-exists {db_name}")
    """Setting up a new PostGis Database"""
    os.system(f"createdb -U {user_name} {db_name}")
    os.system(f"psql -U {user_name} --dbname {db_name} -c 'CREATE EXTENSION postgis;'")

In [24]:
setup_new_postgis_database(db_username, db_name)

CREATE EXTENSION


### Creating Tables

In [25]:
engine = create_engine(db_url)

In [26]:
# Defining the SQL statements to create 311, tree, rent, and zipcode tables
nyc_311_schema = """
CREATE TABLE IF NOT EXISTS nyc_311 
(
    complaint_id INTEGER PRIMARY KEY,
    date DATE,
    complaint_type TEXT,
    zipcode INTEGER,
    city TEXT,
    x_coord INTEGER,
    y_coord INTEGER,
    latitude FLOAT,
    longitude FLOAT,
    geometry GEOMETRY(Point, 4326)
);

CREATE INDEX IF NOT EXISTS idx_nyc_311_geom ON nyc_311 USING GIST (geometry);
"""

nyc_tree_schema = """
CREATE TABLE IF NOT EXISTS nyc_tree
(
    tree_id INTEGER PRIMARY KEY,
    status TEXT,
    health TEXT,
    species TEXT,
    zipcode INTEGER,
    city TEXT,
    latitude FLOAT,
    longitude FLOAT,
    x_coord FLOAT,
    y_coord FLOAT,
    geometry GEOMETRY(Point, 4326)
);

CREATE INDEX IF NOT EXISTS idx_nyc_tree_geom ON nyc_tree USING GIST (geometry);
"""

nyc_rent_schema = """
CREATE TABLE IF NOT EXISTS nyc_rent
(
    zipcode INTEGER PRIMARY KEY,
    state TEXT,
    city TEXT
)
"""

nyc_zipcode_schema = """
CREATE TABLE IF NOT EXISTS nyc_zipcode
(
    zipcode INTEGER PRIMARY KEY,
    city TEXT,
    state TEXT,
    county TEXT,
    geometry GEOMETRY(Polygon, 4326)
);

CREATE INDEX IF NOT EXISTS idx_nyc_zipcode_geom ON nyc_zipcode USING GIST (geometry);
"""

In [27]:
# Executing the schema files to create tables
def create_table(engine, table_schema):
    """Creating a table using the SQL schema"""
    try:
        with engine.connect() as connection:
            connection.execute(text(table_schema))
        print("Table Created")
    except Exception as e:
        print(f"Error: {e}")

In [28]:
# Creating each table
create_table(engine, nyc_311_schema)
create_table(engine, nyc_tree_schema)
create_table(engine, nyc_rent_schema)
create_table(engine, nyc_zipcode_schema)

Table Created
Table Created
Table Created
Table Created


### Adding Data to Database

In [29]:
def write_dataframes_to_table(tablename_to_dataframe, engine):
    """Adding data to SQL table"""
    for table_name, df in tablename_to_dataframe.items():
        # Checking if the dataframe is a GeoDataframe
        if isinstance(df, gpd.GeoDataFrame):
            # Converting Shapely geometries to WKTElement
            df['geometry'] = df['geometry'].apply(
                lambda x: WKTElement(x.wkt, srid=4326) if isinstance(x, BaseGeometry) else x
            )
            geometry_type = 'GEOMETRY'  # Default type
            # GeoDataframe to SQL
            try:
                df.to_sql(table_name, con=engine, if_exists='replace', index=False,
                          dtype={'geometry': Geometry(geometry_type, srid=4326)})
                print(f"Data in {table_name}")
            except Exception as e:
                print(f"{table_name} Error: {e}")
        else:
            # Dataframe to SQL
            try:
                df.to_sql(table_name, con=engine, if_exists='replace', index=False)
                print(f"Data in {table_name}")
            except Exception as e:
                print(f"{table_name} Error: {e}")

In [30]:
gdf_zipcode_sql = gdf_zipcode_f.copy()
gdf_311_sql = gdf_311_f.copy()
gdf_tree_sql = gdf_tree_f.copy()
df_rent_sql = df_rent_f.copy()
# Dataframe to Table mapping
tablename_to_dataframe = {
    "nyc_zipcodes": gdf_zipcode_sql,
    "311_complaints": gdf_311_sql,
    "nyc_trees": gdf_tree_sql,
    "nyc_rents": df_rent_sql,
}

In [31]:
write_dataframes_to_table(tablename_to_dataframe, engine)

  df['geometry'] = df['geometry'].apply(
  df['geometry'] = df['geometry'].apply(
  df['geometry'] = df['geometry'].apply(


Data in nyc_zipcodes
Data in 311_complaints
Data in nyc_trees
Data in nyc_rents


## Part 3: Understanding the Data

### Query 1

In [32]:
# Helper function to write the queries to file
def write_query_to_file(query, outfile):
    raise NotImplementedError()

In [36]:
QUERY_1_FILENAME = QUERY_DIR

QUERY_1 = """
SELECT zipcode, COUNT(complaint_id) as ttl_complaints
FROM 311_complaints
WHERE DATE(date) >= "2022-10-01" and DATE(date) >= "2023-09-30"
GROUP BY zipcode
ORDER BY ttl_complaints DESC;
"""

QUERY_2 = """
SELECT zipcode, COUNT(tree_id) as ttl_trees
FROM nyc_trees
GROUP BY zipcode
ORDER BY ttl_trees DESC
LIMIT 10
"""

QUERY_3 = """
SELECT r.zipcode, TO_CHAR(ROUND(AVG(r."2023-08-31"), 2), 'FM999,999.99') AS average_rent
FROM nyc_rents r JOIN nyc_trees t ON r.zipcode = t.zipcode
WHERE r.state = "NY"
GROUP BY r.zipcode
ORDER BY t.count(t.tree_id) DESC 
LIMIT 10;
"""

QUERY_4 = """
WITH RentAverages AS (
    SELECT r.zipcode
        , TO_CHAR(ROUND(AVG(r."2023-01-31"),2), 'FM999,999.99') AS average_rent
        , COUNT(t.tree_id) as ttl_trees 
        , COUNT(c.complaint_id) as ttl_complaints
        , ROW_NUMBER() OVER (ORDER BY AVG(r."2023-01-31") ASC) AS low_rent_rank,
        , ROW_NUMBER() OVER (ORDER BY AVG(r."2023-01-31") DESC) AS high_rent_rank
    WHERE r.state = "NY"
    FROM nyc_rents r
        JOIN nyc_trees t ON r.zipcode = t.zipcode
        JOIN 311_complaints c ON r.zipcode = c.zipcode
    GROUP BY r.zipcode
    ORDER BY average_rent
)
SELECT zipcode
  , average_rent
  , ttl_trees
  , ttl_complaints
FROM RentAverages
WHERE low_rent_rank <= 5 OR high_rent_rank <= 5
ORDER BY high_rent_rank, low_rent_rank;
"""

QUERY_5 = """
SELECT z.zipcode, COUNT(t.tree_id) as ttl_trees
FROM nyc_trees t JOIN nyc_zipcodes z ON ST_Within(t.geometry, z.geometry)
GROUP BY z.zipcode
ORDER BY ttl_trees DESC
LIMIT 10;
"""

QUERY_6 = """

"""

In [34]:
with engine.connect() as conn:
    result = conn.execute(db.text(QUERY_1))
    for row in result:
        print(row)

NameError: name 'db' is not defined

In [None]:
write_query_to_file(QUERY_1, QUERY_1_FILENAME)

## Part 4: Visualizing the Data

### Visualization 1

In [None]:
# use a more descriptive name for your function
def plot_visual_1(dataframe):
    figure, axes = plt.subplots(figsize=(20, 10))
    
    values = "..."  # use the dataframe to pull out values needed to plot
    
    # you may want to use matplotlib to plot your visualizations;
    # there are also many other plot types (other 
    # than axes.plot) you can use
    axes.plot(values, "...")
    # there are other methods to use to label your axes, to style 
    # and set up axes labels, etc
    axes.set_title("Some Descriptive Title")
    
    plt.show()

In [None]:
def get_data_for_visual_1():
    # Query your database for the data needed.
    # You can put the data queried into a pandas/geopandas dataframe, if you wish
    raise NotImplementedError()

In [None]:
some_dataframe = get_data_for_visual_1()
plot_visual_1(some_dataframe)