In [6]:
# Import the dependencies:
import pandas as pd
import hvplot.pandas
from pathlib import Path

In [7]:
# LOCATION-BASED DATA IN FINTECH
# In the FinTech industry, location often plays a critical role in the understanding of financial and economic data.
# For example, FinTech lenders use mapping technologies to define geographic regions.
# Specifically, they define geographic regions where demographics and economic factors indicate both the need for loans and the best opportunities for repayment.
# Red-share car services use similar location-based technologies to identify the areas where they'll most likely encounter individuals in need of a ride.
# Understanding how potential opportunities, resources, and clients are geographically distributed - from the neighborhood level to the entire globe - is key to establising a successful business.
# Technology allows businesses to target markets well beyond their immediate locations.
# Access to almost infinite amounts of data means that businesses can scour the globe for opportunities.
# With location based technologies, firms can visualize the distribution of data and pinpoint the locations where the best business opportunities exist.

In [8]:
# UNDERSTANDING GEOSPATIAL DATA
# GEOSPATIAL DATA is another term for location-based data.
# Geospatial data typically includes coordinates (latitude/longitude), addresses, and zip codes.
# It also typically includes information like cities, counties, states, provinces, or countries/regions.
# By basing visualizations on geospatial data, we can observe how the data is distributed across a specified location.

In [13]:
# PLOTTING GEOGRAPHIC DATA WITH HVPLOT
# Some hvPlot typyes support using geospatial data values to create interactive map visualizations.
# The latitufe and longitude are the two main data values for geospatial visualizations.
# Most options anchor around these two values.
# They provide the most accurate location data that's possible, which is invaluable when we aggregate information.

# Read the data from the `population_counts.csv` file into a DataFrame called `population_df`:
population_df = pd.read_csv(
    Path('population_counts.csv'),
)

# Review the first and last 5 rows of the `population_df` DataFrame:
display(population_df.head())
display(population_df.tail())

Unnamed: 0,Year,StateAbbr,StateDesc,CityName,PopulationCount,Latitude,Longitude
0,2016,AL,Alabama,Birmingham,212237,33.527566,-86.798817
1,2016,AL,Alabama,Birmingham,212237,33.527566,-86.798817
2,2016,AL,Alabama,Birmingham,3042,33.579433,-86.722832
3,2016,AL,Alabama,Birmingham,2735,33.542821,-86.752434
4,2016,AL,Alabama,Birmingham,3338,33.563245,-86.764047


Unnamed: 0,Year,StateAbbr,StateDesc,CityName,PopulationCount,Latitude,Longitude
810042,2016,WY,Wyoming,Cheyenne,3961,41.158506,-104.777632
810043,2016,WY,Wyoming,Cheyenne,1913,41.171776,-104.788212
810044,2016,WY,Wyoming,Cheyenne,3312,41.160374,-104.756561
810045,2016,WY,Wyoming,Cheyenne,4518,41.150634,-104.755676
810046,2016,WY,Wyoming,Cheyenne,214,41.133617,-104.719054


In [16]:
# Now we can do some data cleaning and preparation before we plot our geospatial data on a map.
# We'll select the data for New York in 2016 and drop any duplicate rows:
ny_population_data = population_df[population_df['StateDesc'] == 'New York']

# Select the population for the year 2016:
new_york_population_data = ny_population_data[ny_population_data['Year'] == 2016]

# Remove duplicates from the DataFrame:
new_york_population_data = bigapple_population_data.drop_duplicates()

# Review the first and last five rows of the DataFrame:
display(new_york_population_data.head())
display(new_york_population_data.tail())

Unnamed: 0,Year,StateAbbr,StateDesc,CityName,PopulationCount,Latitude,Longitude
483854,2016,NY,New York,Albany,2393,42.660364,-73.765214
483858,2016,NY,New York,Albany,97856,42.666397,-73.798683
483860,2016,NY,New York,Albany,2139,42.668922,-73.736313
483861,2016,NY,New York,Albany,6046,42.664413,-73.751504
483862,2016,NY,New York,Albany,5888,42.680903,-73.782682


Unnamed: 0,Year,StateAbbr,StateDesc,CityName,PopulationCount,Latitude,Longitude
552554,2016,NY,New York,Yonkers,1687,40.932726,-73.854729
552555,2016,NY,New York,Yonkers,3356,40.926716,-73.844803
552556,2016,NY,New York,Yonkers,5397,40.91968,-73.850542
552557,2016,NY,New York,Yonkers,2689,40.923546,-73.855661
552558,2016,NY,New York,Yonkers,2947,40.927029,-73.861232


In [17]:
# Finally, we can create our first geospatial visualization. 
# For this example, we'll use `points` from hvPlot. With `points`, we can plot the location of each data point on a map by supplying the name of the DataFrame being used, followed by specifying the DataFrame columns that specify longitude and latitude information.
# In the case of our `new_york_population_data` DataFrame, these columns are named `Longitude` and `Latitude`.
# Setting the `geo` parameter to `True` enables GeoViews within the hvPlot `points` function.
# We can also control other aspects of the plot, such as the size and color of the data points, as well as the zoom setting and the size of the plot itself.
# In this example, we'll let the point `size` vary by `PopulationCount`, with a `scale` of 0.04.
# This will give us bigger circles for bigger populations and smaller circles for smaller populations.
# The scale factor keeps the point size appropriate for the size of the map plot. 
# We'll also assign `CityName` to the color.
# This will give us a differently colored circle for each city name in the data. 
# Finally, we set the `frame_width` to 700 and the `frame_height` to 500.

# Plot data in a scatter plot using hvPlot with GeoViews enabled:
new_york_population_data.hvplot.points(
    'Longitude',
    'Latitude',
    geo=True,
    size='PopulationCount',
    scale=.04,
    color='CityName',
    tiles='OSM',
    frame_width=700,
    frame_height=500
)

In [18]:
# USE CASE: GEOSPATIAL DATA IN REAL-ESTATE ANALYSIS
# Let's explore how to use hvPlot with GeoViews to create dynamic visualizations for real-estate data.
# These visualizations will enhance our analyses by offering insight into the geospatial data that we're working with.
# Let's use the dataset from our previous example to interactively explore and analyze opportunities in the New York real-estate market.

In [20]:
# ANALYZE THE GEOSPATIAL DATA
# Let's say you want to identify the three or four most densely populated cities in New York.
# We want to launch our company's long-term housing rental initiatice and correspoinding ad campaign in these cities.
# We have access to the 2016 population data for the state of New York that's broken down by city and by longitude and latitude.
# With this information, we can determine the relative population of each city in New York and how the population is distributed across the state.
# The following shows the first and last five rows of the `new_york_population_data` DataFrame:
display(new_york_population_data.head())
display(new_york_population_data.tail())

# Notice that the DataFrame list the coordinates for each city's latitude and longitude.
# With this information, hvPlot can render the map with incredible accuracy.
# Additionally, by aggregating these latitude and longitude points by city name, hvPlot can render the scatter points by city name and color.
# When we plot the DataFrame as a map, we can notice that New York has the largest scatter plot circle on the map.
# This indicates that New York is more densely populated than any other city in the state.
# To confirm the sizes of the relative circles, we can zoom in on specific areas of the state by using hvPlot's "Box Zoom" tool, which is the magnifying glass icon.
# Additionally, we can confirm the population density values by hovering our mouse over any point to see detailed information about that datapoint.
# With the "Pan" tool, an icon with arrows pointing in four directions, we can navigate from one part of the map to another.

# Now let's consider how to use this information for the company's ad campaign. 
# The map shows that the four largest cities in New York are New York, Buffalo, Rochester, and Yonkers.
# Yonkers is close to New York in the south, however Buffalo and Rochester are closer to each other, but much farther north.
# Since there is an even split of 2 and 2 between the largest cities and their proximity to each other, we can gear a campaign toward the specific interests, industries, and economic conditions of the state region (north or south).
# We'd find it difficult to think of campaign ideas for these markets with just location-based tabular data alone.
# We nee the map and the plot to understand the relationships among the most densely populated cities.

Unnamed: 0,Year,StateAbbr,StateDesc,CityName,PopulationCount,Latitude,Longitude
483854,2016,NY,New York,Albany,2393,42.660364,-73.765214
483858,2016,NY,New York,Albany,97856,42.666397,-73.798683
483860,2016,NY,New York,Albany,2139,42.668922,-73.736313
483861,2016,NY,New York,Albany,6046,42.664413,-73.751504
483862,2016,NY,New York,Albany,5888,42.680903,-73.782682


Unnamed: 0,Year,StateAbbr,StateDesc,CityName,PopulationCount,Latitude,Longitude
552554,2016,NY,New York,Yonkers,1687,40.932726,-73.854729
552555,2016,NY,New York,Yonkers,3356,40.926716,-73.844803
552556,2016,NY,New York,Yonkers,5397,40.91968,-73.850542
552557,2016,NY,New York,Yonkers,2689,40.923546,-73.855661
552558,2016,NY,New York,Yonkers,2947,40.927029,-73.861232


In [None]:
# ON THE JOB
# The real-estate markket is a financial sector that depends on geospatial data for analysis.
# Specifically, real estate focuses on data points like latitude, longitude, zip code, and address.
# Real estate investors use this information to make decisions about the property types and locatinos to target for investment.
# We can use geospatial data in various scenarios that relate to real estate analysis including the following:
    # 1. Identifying investable locations based on economic statistics, like cost of living.
    # 2. Identifying locations for buying properties at a discount, based on the available foreclosure information.
    # 3. Identifying locations for accumulating a portfolio of rental properties from active housing markets.