 ## An introduction to geopandas and creating geodataframes

In [None]:
import os
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

In [None]:
# Be sure to set the appropriate path for your data for this notebook to work
# If you are using the link from GitHub for a binder version of this notebook, simply use "data/"

os.chdir("C:/Users/fritzdi/DataScience/data")  # add your own directory path here!

cwd = os.getcwd()
print(cwd)

In [None]:
# First, we'll work with a .csv in pandas

df = pd.read_csv("DenverAirbnb-summarylistings.csv")
df.head(3)

In [None]:
# Let's get a summary of our column headers

list(df.columns)

In [None]:
# Now, we'll use geopandas to geo-enable our .csv since it has lat & lon data!

gdf = gpd.GeoDataFrame(
    df, geometry=gpd.points_from_xy(df.longitude, df.latitude))

In [None]:
# We've added a geometry column at the end:

list(gdf.columns)

In [None]:
gdf.head()

In [None]:
type(gdf)

In [None]:
# The .plot() method in geopandas is based on matplotlib
# there is an argument "kind" where the default is "geo" so it automagically makes a map

gdf.plot()

In [None]:
# Let's created a map plot with a base reference layer
# And yes, geopandas can directly read shapefiles!

denver = gpd.read_file("county_boundary_lines/county_boundary_lines.shp")

base = denver.plot()
gdf.plot(ax=base, marker='*',color='green',markersize=3)
plt.show()

In [None]:
# We can also explore our data with different plots by changing the "kind" argument:

gdf.plot(kind='scatter', x="price", y="number_of_reviews")

In [None]:
census = gpd.read_file("census_tracts_2010/census_tracts_2010.shp")

In [None]:
census.head(3)

In [None]:
# Notice how there are so many columns (145!) we arent' shown all of them, but we can list them out if we want:

# list(census.columns)

In [None]:
# Use the "column" argument, along with a numerical value attribute to make a choropleh map

census.plot(column='PCT_BLACK', legend=True)

In [None]:
# The tree_canopy data is large and not loaded on GitHub, but you can download the zip and work with this locally
# You can skip the section using this data - the following examples are similar

trees = gpd.read_file("tree_canopy_assessment_2013___land_use/tree_canopy_assessment_2013___land_use.shp")

In [None]:
trees.plot(column='AREA_SQKM', legend=True)

In [None]:
trees.columns

In [None]:
# The only field we have that has less than 10 unique values is "PROJECT_LU" - all the others are numerical

for item in trees.columns[:-1]:
        uniquelist = trees[item].unique()
        if uniquelist.flatten().shape[0]<10:
            print("Column name {}".format(item))
            print("Unique values {}".format(trees[item].unique()))
            print("___________________________________")

In [None]:
# Here is a file on zoning for urban gardening that has more fields with unique values we may want to investigate:

urbanzone = gpd.read_file("zoning_for_urban_gardens/zoning_for_urban_gardens.shp")

In [None]:
urbanzone.columns

In [None]:
urbanzone.plot()

In [None]:
for item in urbanzone.columns[:-1]:
        uniquelist = urbanzone[item].unique()
        if uniquelist.flatten().shape[0]<12:
            print("Column name {}".format(item))
            print("Unique values {}".format(urbanzone[item].unique()))
            print("___________________________________")

In [None]:
trees.total_bounds  # If you don't have the trees data, this shows lat and lon bounds with a further extent than urbanzone

In [None]:
urbanzone.total_bounds

In [None]:
# Checking our coordinate reference system (they are important!):

urbanzone.crs

In [None]:
# Creating a filter for our data:

urbanzoneedge = urbanzone[urbanzone['NBHD_CONTE']=='URBAN EDGE']

In [None]:
urbanzoneedge.plot()

There are many more geospatial functions you can perform with geopandas and all of its dependencies!

Use the documentation to explore: https://geopandas.org/docs.html