In [1]:
import warnings
warnings.filterwarnings('ignore')

import ipyleaflet
from ipyleaflet import Map, GeoData, basemaps, WidgetControl, GeoJSON,  \
                       LayersControl, Icon, Marker, basemap_to_tiles,    \
                       Choropleth, MarkerCluster, Heatmap, SearchControl, \
                       FullScreenControl

import IPython.display
from IPython.display import Image, display, display_html

import ipywidgets as ipw
from ipywidgets import Text, HTML, link, FloatSlider

import branca as br
from branca.colormap import linear

import geopandas as gpd
from geopandas import read_file

import json
import matplotlib as mpl
import matplotlib.pyplot as plt
import folium as fol
import mapclassify as mc
import pandas as pd
import numpy as np
import os
import requests

# Keeping plots embedded within our Jupyter Notebook
%matplotlib inline

# Importing dark theme for data visualizations
__import__ ('jupyterthemes').jtplot.style()

In [2]:
# Sample test run (test mapping Asia)
countries = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

center = [30.546647, 79.373178]
zoom = 3

asia = countries[countries['continent'] == 'Asia']
asia_map = \
Map(
    basemap=basemaps.CartoDB.Positron,
    center=center,
    zoom=zoom
)
geodata = \
GeoData(
    geo_dataframe = asia, 
    style={
        'color': 'black',
        'fillColor': '#E0D071',
        'opacity':0.03,
        'weight':1.9,
        'dashArray':'2',
        'fillOpacity':0.6
    },
    hover_style={'fillColor': '#b08a3e' , 'fillOpacity': 0.8},
    name = 'Countries'
)
asia_map.add_layer(geodata)
asia_map

Map(center=[30.546647, 79.373178], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title',…

In [3]:
# Listing all territories within North America
countries.loc[countries['continent'] == 'North America']

Unnamed: 0,pop_est,continent,name,iso_a3,gdp_md_est,geometry
3,37589262.0,North America,Canada,CAN,1736425,"MULTIPOLYGON (((-122.84000 49.00000, -122.9742..."
4,328239523.0,North America,United States of America,USA,21433226,"MULTIPOLYGON (((-122.84000 49.00000, -120.0000..."
16,11263077.0,North America,Haiti,HTI,14332,"POLYGON ((-71.71236 19.71446, -71.62487 19.169..."
17,10738958.0,North America,Dominican Rep.,DOM,88941,"POLYGON ((-71.70830 18.04500, -71.68774 18.316..."
19,389482.0,North America,Bahamas,BHS,13578,"MULTIPOLYGON (((-78.98000 26.79000, -78.51000 ..."
22,56225.0,North America,Greenland,GRL,3051,"POLYGON ((-46.76379 82.62796, -43.40644 83.225..."
27,127575529.0,North America,Mexico,MEX,1268870,"POLYGON ((-117.12776 32.53534, -115.99135 32.6..."
33,4246439.0,North America,Panama,PAN,66800,"POLYGON ((-77.35336 8.67050, -77.47472 8.52429..."
34,5047561.0,North America,Costa Rica,CRI,61801,"POLYGON ((-82.54620 9.56613, -82.93289 9.47681..."
35,6545502.0,North America,Nicaragua,NIC,12520,"POLYGON ((-83.65561 10.93876, -83.89505 10.726..."


In [43]:
# Viewing default data for gpd's naturalearth_lowres set
test_usa = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
test_usa.sample(5)

Unnamed: 0,pop_est,continent,name,iso_a3,gdp_md_est,geometry
164,6777452.0,Africa,Libya,LBY,52091,"POLYGON ((25.00000 22.00000, 25.00000 20.00304..."
148,31949777.0,Asia,Malaysia,MYS,364681,"MULTIPOLYGON (((100.08576 6.46449, 100.25960 6..."
109,2957731.0,Asia,Armenia,ARM,13672,"POLYGON ((46.50572 38.77061, 46.14362 38.74120..."
84,9770529.0,Asia,United Arab Emirates,ARE,421142,"POLYGON ((51.57952 24.24550, 51.75744 24.29407..."
26,2125268.0,Africa,Lesotho,LSO,2376,"POLYGON ((28.97826 -28.95560, 29.32517 -29.257..."


In [5]:
cds = gpd.read_file('C:/Users/remar/OneDrive/Desktop/Code/SynthWriter/Current Market Rental Data Analysis/geo_shape_files/congressional_districts/cb_2020_us_cd116_20m.shp')
type(cds)

geopandas.geodataframe.GeoDataFrame

In [6]:
display(
    cds.info(),
    cds.sample(10)
)

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 437 entries, 0 to 436
Data columns (total 10 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   STATEFP   437 non-null    object  
 1   CD116FP   437 non-null    object  
 2   AFFGEOID  437 non-null    object  
 3   GEOID     437 non-null    object  
 4   NAMELSAD  437 non-null    object  
 5   LSAD      437 non-null    object  
 6   CDSESSN   437 non-null    object  
 7   ALAND     437 non-null    int64   
 8   AWATER    437 non-null    int64   
 9   geometry  437 non-null    geometry
dtypes: geometry(1), int64(2), object(7)
memory usage: 34.3+ KB


None

Unnamed: 0,STATEFP,CD116FP,AFFGEOID,GEOID,NAMELSAD,LSAD,CDSESSN,ALAND,AWATER,geometry
3,36,1,5001600US3601,3601,Congressional District 1,C2,116,1684046571,3359943463,"MULTIPOLYGON (((-72.01893 41.27411, -71.92680 ..."
328,12,2,5001600US1202,1202,Congressional District 2,C2,116,28500816318,9297236174,"POLYGON ((-85.99274 30.38932, -85.85977 30.490..."
141,51,4,5001600US5104,5104,Congressional District 4,C2,116,9433217400,296493194,"POLYGON ((-77.90025 37.14388, -77.79593 37.192..."
111,25,8,5001600US2508,2508,Congressional District 8,C2,116,844339683,448445056,"POLYGON ((-71.22917 42.26415, -71.18821 42.280..."
6,48,35,5001600US4835,4835,Congressional District 35,C2,116,1540484180,18890415,"POLYGON ((-98.52602 29.46636, -98.49180 29.456..."
258,5,3,5001600US0503,503,Congressional District 3,C2,116,13985796400,374570670,"POLYGON ((-94.61792 36.49941, -94.36120 36.499..."
296,39,16,5001600US3916,3916,Congressional District 16,C2,116,3121705125,31215207,"POLYGON ((-82.12933 40.99181, -81.97738 40.989..."
105,36,7,5001600US3607,3607,Congressional District 7,C2,116,41802690,2679594,"POLYGON ((-74.03093 40.72279, -74.02349 40.737..."
365,17,18,5001600US1718,1718,Congressional District 18,C2,116,27234848169,383625124,"POLYGON ((-91.50626 40.20016, -91.49696 40.248..."
67,26,11,5001600US2611,2611,Congressional District 11,C2,116,1085486513,55928940,"POLYGON ((-83.68278 42.69558, -83.21403 42.709..."


In [44]:
# Testing additional congressional districts dataset
center = [37.546647, -90.373178]
zoom = 4

congress_districs = cds[cds['STATEFP'] == '12']
usa_map = \
Map(
    basemap=basemaps.CartoDB.Positron,
    center=center,
    zoom=zoom
)
geodata = \
GeoData(
    geo_dataframe = cds, 
    style={
        'color': 'black',
        'fillColor': '#E0D071',
        'opacity':0.03,
        'weight':1.9,
        'dashArray':'2',
        'fillOpacity':0.6
    },
    hover_style={'fillColor': '#b08a3e' , 'fillOpacity': 0.8},
    name = 'Countries'
)
usa_map.add_layer(geodata)
usa_map

Map(center=[37.546647, -90.373178], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title'…

In [12]:
# Reading data on counties (with geodataframe polygons) and zip codes (from raw csv file)
df_counties = gpd.read_file('https://www2.census.gov/geo/tiger/GENZ2020/shp/cb_2020_us_county_20m.zip')

geo_data_url = 'https://raw.githubusercontent.com/scpike/us-state-county-zip/master/geo-data.csv'

# Examining datatypes
print(
    ' df_counties datatype = {} \
    \n geo_data_url datatype = {}'
    .format(
        type(df_counties),
        type(geo_data_url)
    )
)

 df_counties datatype = <class 'geopandas.geodataframe.GeoDataFrame'>     
 geo_data_url datatype = <class 'str'>


In [13]:
#### OPTIMIZED NEW DF WITH OPPORTUNITY FOR STATE_CODE ENTRY

#df_all_ga_counties = df_counties.loc[df_counties['STUSPS'] == ''+state_code+'']
df_all_ga_counties = df_counties.loc[df_counties['STUSPS'] == 'GA']
df_all_ga_counties
df_counties.sample(5)

Unnamed: 0,STATEFP,COUNTYFP,COUNTYNS,AFFGEOID,GEOID,NAME,NAMELSAD,STUSPS,STATE_NAME,LSAD,ALAND,AWATER,geometry
1789,39,167,1074095,0500000US39167,39167,Washington,Washington County,OH,Ohio,6,1636805226,20784952,"POLYGON ((-81.84486 39.45022, -81.82316 39.494..."
1297,29,151,758530,0500000US29151,29151,Osage,Osage County,MO,Missouri,6,1571034361,17734586,"POLYGON ((-92.19448 38.33554, -92.11498 38.401..."
847,51,775,1789074,0500000US51775,51775,Salem,Salem city,VA,Virginia,25,37593968,275156,"POLYGON ((-80.12049 37.28705, -80.02223 37.308..."
397,21,79,516886,0500000US21079,21079,Garrard,Garrard County,KY,Kentucky,6,595979380,9931261,"POLYGON ((-84.68934 37.72727, -84.71693 37.815..."
2970,13,173,348102,0500000US13173,13173,Lanier,Lanier County,GA,Georgia,6,509030778,8418857,"POLYGON ((-83.19797 31.02540, -83.16507 31.147..."


In [14]:
# Converting str datatype to pandas dataframe
df_geo = pd.read_csv(geo_data_url)
print(' df_geo datatype = {}'.format(type(df_geo)))

 df_geo datatype = <class 'pandas.core.frame.DataFrame'>


In [15]:
display(
    df_counties.loc[df_counties['NAME'] == 'Shelby'].nunique(),
    df_geo.loc[df_geo['county'] == 'Shelby'].nunique()
)

STATEFP       9
COUNTYFP      9
COUNTYNS      9
AFFGEOID      9
GEOID         9
NAME          1
NAMELSAD      1
STUSPS        9
STATE_NAME    9
LSAD          1
ALAND         9
AWATER        9
geometry      9
dtype: int64

state_fips      9
state           9
state_abbr      9
zipcode       113
county          1
city           82
dtype: int64

In [16]:
df_counties['STATE_NAME'].unique()

array(['Arkansas', 'Colorado', 'Michigan', 'Mississippi', 'North Dakota',
       'Ohio', 'Kentucky', 'Alaska', 'Texas', 'Oklahoma', 'Missouri',
       'Nebraska', 'Wisconsin', 'Indiana', 'Minnesota', 'Iowa',
       'Illinois', 'South Dakota', 'Florida', 'Kansas', 'North Carolina',
       'South Carolina', 'Utah', 'Alabama', 'New Jersey', 'Pennsylvania',
       'Tennessee', 'Montana', 'Louisiana', 'Puerto Rico', 'Virginia',
       'California', 'Idaho', 'New York', 'Arizona', 'Georgia',
       'West Virginia', 'Rhode Island', 'New Mexico', 'Oregon', 'Nevada',
       'New Hampshire', 'Maine', 'Wyoming', 'Washington', 'Vermont',
       'District of Columbia', 'Maryland', 'Massachusetts', 'Hawaii',
       'Connecticut', 'Delaware'], dtype=object)

In [17]:
df_counties.loc[df_counties['NAME'] == 'Miami'].shape
df_counties.shape
df_counties.info()
df_counties.loc[df_counties['STATE_NAME'] == 'Georgia']
df_counties.sample()
#df_counties.loc[df_counties['NAME'] == 'Gwinnett']
#df_counties.loc[(df_counties['NAME'] == 'Fulton') & (df_counties['STATE_NAME'] == 'Georgia')]

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 3221 entries, 0 to 3220
Data columns (total 13 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   STATEFP     3221 non-null   object  
 1   COUNTYFP    3221 non-null   object  
 2   COUNTYNS    3221 non-null   object  
 3   AFFGEOID    3221 non-null   object  
 4   GEOID       3221 non-null   object  
 5   NAME        3221 non-null   object  
 6   NAMELSAD    3221 non-null   object  
 7   STUSPS      3221 non-null   object  
 8   STATE_NAME  3221 non-null   object  
 9   LSAD        3221 non-null   object  
 10  ALAND       3221 non-null   int64   
 11  AWATER      3221 non-null   int64   
 12  geometry    3221 non-null   geometry
dtypes: geometry(1), int64(2), object(10)
memory usage: 327.3+ KB


Unnamed: 0,STATEFP,COUNTYFP,COUNTYNS,AFFGEOID,GEOID,NAME,NAMELSAD,STUSPS,STATE_NAME,LSAD,ALAND,AWATER,geometry
1961,35,3,929108,0500000US35003,35003,Catron,Catron County,NM,New Mexico,6,17933561654,14193499,"POLYGON ((-109.04730 33.40978, -109.04661 33.7..."


In [18]:
df_all_ga_counties.sample(5)

Unnamed: 0,STATEFP,COUNTYFP,COUNTYNS,AFFGEOID,GEOID,NAME,NAMELSAD,STUSPS,STATE_NAME,LSAD,ALAND,AWATER,geometry
1537,13,33,347944,0500000US13033,13033,Burke,Burke County,GA,Georgia,6,2142015889,20578826,"POLYGON ((-82.27235 32.93780, -82.29118 33.062..."
1879,13,313,353513,0500000US13313,13313,Whitfield,Whitfield County,GA,Georgia,6,752248663,1579031,"POLYGON ((-85.14479 34.76764, -85.06165 34.819..."
2894,13,7,342832,0500000US13007,13007,Baker,Baker County,GA,Georgia,6,885665381,18598653,"POLYGON ((-84.62758 31.33212, -84.63758 31.433..."
2508,13,309,351278,0500000US13309,13309,Wheeler,Wheeler County,GA,Georgia,6,765314383,12377032,"POLYGON ((-82.92786 32.13527, -82.87486 32.180..."
2475,13,151,1671894,0500000US13151,13151,Henry,Henry County,GA,Georgia,6,825449018,20273034,"POLYGON ((-84.35382 33.39729, -84.29468 33.435..."


In [19]:
center = [37.546647, -90.373178]
zoom = 4
df_all_ga_counties.explore()

# Creating geodata using IpyLeaflet + Carto
usa_map = Map(basemap=basemaps.CartoDB.Positron, center=center, zoom=zoom)
         # DARK THEME BASEMAP = Map(basemap=basemaps.CartoDB.DarkMatter, center=center, zoom=zoom)

# Adding dataframe to be mapped
geodata = GeoData(
    geo_dataframe = df_all_ga_counties.loc[df_all_ga_counties['NAME'] == 'Gwinnett'],
    style={'color': 'white', 'fillColor': '#ff00ff', 'opacity':0.03, 'weight':1.9, 'dashArray':'2', 'fillOpacity':0.2},
    hover_style={'fillColor': '#ff00ff' , 'fillOpacity': 0.1},
    name = 'df_counties'
)
usa_map.add_layer(geodata)
usa_map

Map(center=[37.546647, -90.373178], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title'…

In [20]:
df_all_ga_counties.loc[df_all_ga_counties['NAME'] == 'Fulton']

Unnamed: 0,STATEFP,COUNTYFP,COUNTYNS,AFFGEOID,GEOID,NAME,NAMELSAD,STUSPS,STATE_NAME,LSAD,ALAND,AWATER,geometry
2503,13,121,1694833,0500000US13121,13121,Fulton,Fulton County,GA,Georgia,6,1364133162,19666288,"POLYGON ((-84.81587 33.52025, -84.80893 33.574..."


In [21]:
df_ga_counties_by_region = pd.read_csv('C:/Users/remar/OneDrive/Desktop/Code/SynthWriter/Current Market Rental Data Analysis/GA_counties_by_region.csv')#.fillna('')
df_ga_counties_by_region

Unnamed: 0,Region One,Region Two,Region Three,Region Four,Region Five,Region Six,Region Seven,Region Eight,Region Nine,Region Ten,Region Eleven,Region Twelve
0,Bartow,Banks,Butts,Carroll,Barrow,Baldwin,Burke,Chattahoochee,Appling,Baker,Atkinson,Bryan
1,Catoosa,Dawson,Clayton,Coweta,Clarke,Bibb,Columbia,Clay,Bleckley,Calhoun,Bacon,Bulloch
2,Chattooga,Forsyth,Cobb,Douglas,Elbert,Crawford,Glascosk,Dooly,Candler,Decatur,Ben Hill,Camden
3,Cherokee,Franklyn,DeKalb,Heard,Greene,Houston,Hancock,Harris,Dodge,Dougherty,Berrien,Chatham
4,Dade,Haversham,Fayette,Lamar,Jackson,Jones,Jefferson,Macon,Emanuel,Early,Brantley,Effingham
5,Fannin,Hall,Fulton,Meriwether,Jasper,Monroe,Jenkins,Marion,Jeff Davis,Grady,Brooks,Evans
6,Floyd,Hart,Gwinnett,Pike,Madison,Peach,Lincoln,Muscogee,Johnson,Lee,Charlton,Glynn
7,Gilmer,Lumpkin,Henry,Troup,Morgan,Pulaski,McDuffie,Quitman,Laurens,Miller,Clinch,Liberty
8,Gordon,Rabun,Rockdale,Upson,Newton,Putnam,Richmond,Randolph,Montgomery,Mitchell,Coffee,Long
9,Haralson,Stephens,Spalding,,Oconee,Twiggs,Screven,Schley,Tattnall,Seminole,Cook,McIntosh


In [22]:
atl_counties = df_ga_counties_by_region['Region Three'].dropna().tolist()
peripheral_counties = ['Bartow','Paulding','Carroll','Coweta','Newton','Walton','Barrow','Forsyth', 'Hall', 'Douglas', 'Cherokee']
list_test = atl_counties + peripheral_counties

#atl_counties.append(peripheral_counties)
#atl_counties.sort()
atl_counties
peripheral_counties
list_test.sort()
list_test

['Barrow',
 'Bartow',
 'Butts',
 'Carroll',
 'Cherokee',
 'Clayton',
 'Cobb',
 'Coweta',
 'DeKalb',
 'Douglas',
 'Fayette',
 'Forsyth',
 'Fulton',
 'Gwinnett',
 'Hall',
 'Henry',
 'Newton',
 'Paulding',
 'Rockdale',
 'Spalding',
 'Walton']

In [23]:
# Sourcing zip code county data from public Github repo
geo_data_url = 'https://raw.githubusercontent.com/scpike/us-state-county-zip/master/geo-data.csv'
df_geo = pd.read_csv(geo_data_url)

In [24]:
# Filtering to counties in the Atlanta Metro region (region 3 on image)
df_atl_counties = df_geo.loc[(df_geo['state_abbr'] == 'GA') &
                             (df_geo['county'].isin(list_test))]         # ORIG CODE = (df_geo['county'].isin(atl_counties))]

#### ORINGIAL CODE WITH STATE_CODE
#df_atl_counties = df_geo.loc[(df_geo['state_abbr'] == ''+state_code+'') &
#                             (df_geo['county'].isin(atl_counties))]

In [25]:
df_geo
df_geo.loc[(df_geo['state_abbr'] == 'GA') & (df_geo['county'].isin(list_test))]         # ORIG CODE = (df_geo['county'].isin(atl_counties))]
#df_atl_counties

Unnamed: 0,state_fips,state,state_abbr,zipcode,county,city
5504,13,Georgia,GA,30002,DeKalb,Avondale estates
5505,13,Georgia,GA,30004,Fulton,Zcta 30004
5506,13,Georgia,GA,30005,Fulton,Zcta 30005
5507,13,Georgia,GA,30008,Cobb,Zcta 30008
5508,13,Georgia,GA,30011,Barrow,Zcta 30011
...,...,...,...,...,...,...
5834,13,Georgia,GA,30641,Walton,Good hope
5840,13,Georgia,GA,30655,Walton,Monroe
5841,13,Georgia,GA,30656,Walton,Zcta 30656
5847,13,Georgia,GA,30666,Barrow,Statham


In [26]:
df_all_ga_counties.rename(columns={'NAME':'county'}, inplace=True)

display(
    df_all_ga_counties.loc[df_all_ga_counties['county'] != 'Cobb'].sample(5),
    df_atl_counties.loc[df_atl_counties['county'] == 'Cobb'].sample(5),
    df_all_ga_counties.shape,
    df_atl_counties.shape
)

Unnamed: 0,STATEFP,COUNTYFP,COUNTYNS,AFFGEOID,GEOID,county,NAMELSAD,STUSPS,STATE_NAME,LSAD,ALAND,AWATER,geometry
3033,13,305,350608,0500000US13305,13305,Wayne,Wayne County,GA,Georgia,6,1662287070,17768149,"POLYGON ((-82.13301 31.77340, -82.08590 31.823..."
2254,13,221,351262,0500000US13221,13221,Oglethorpe,Oglethorpe County,GA,Georgia,6,1137165335,7708032,"POLYGON ((-83.30662 33.81144, -83.27593 33.847..."
1035,13,247,357592,0500000US13247,13247,Rockdale,Rockdale County,GA,Georgia,6,336226920,5900974,"POLYGON ((-84.18414 33.64616, -84.11579 33.614..."
818,13,147,1687995,0500000US13147,13147,Hart,Hart County,GA,Georgia,6,601970613,63521436,"POLYGON ((-83.09409 34.41607, -83.05057 34.495..."
482,13,161,1687999,0500000US13161,13161,Jeff Davis,Jeff Davis County,GA,Georgia,6,857075892,11600966,"POLYGON ((-82.83637 31.81615, -82.65826 31.923..."


Unnamed: 0,state_fips,state,state_abbr,zipcode,county,city
5582,13,Georgia,GA,30127,Cobb,Zcta 30127
5537,13,Georgia,GA,30060,Cobb,Marietta
5507,13,Georgia,GA,30008,Cobb,Zcta 30008
5541,13,Georgia,GA,30067,Cobb,Marietta
5551,13,Georgia,GA,30082,Cobb,Smyrna


(159, 13)

(187, 6)

In [27]:
df_atl_counties.sample(5)

Unnamed: 0,state_fips,state,state_abbr,zipcode,county,city
5753,13,Georgia,GA,30504,Hall,Gainesville
5684,13,Georgia,GA,30318,Fulton,Atlanta
5701,13,Georgia,GA,30345,DeKalb,Atlanta
5641,13,Georgia,GA,30252,Henry,Zcta 30252
5665,13,Georgia,GA,30294,DeKalb,Zcta 30294


In [28]:
# Checking if zip code is valid
df_atl_counties['valid_zip_code'] = df_atl_counties.apply(lambda x: x['zipcode'].isnumeric(), axis=1)
print(df_atl_counties.groupby(['valid_zip_code', 'county'])['zipcode'].count().sort_index())
print(
    '\n{:0,} invalid zip codes found out of {:0,} zip codes total ({:.2f}%): {}'.format(
        df_atl_counties.loc[df_atl_counties['valid_zip_code'] == False]['zipcode'].nunique(),
        df_atl_counties['zipcode'].nunique(),
        (df_atl_counties.loc[df_atl_counties['valid_zip_code'] == False]['zipcode'].nunique() / df_atl_counties['zipcode'].nunique())*100,
        df_atl_counties.loc[df_atl_counties['valid_zip_code'] == False]['zipcode'].unique()
    )
)

valid_zip_code  county  
False           Bartow       1
                Butts        1
                Cobb         2
True            Barrow       4
                Bartow       8
                Butts        3
                Carroll      8
                Cherokee     7
                Clayton      9
                Cobb        17
                Coweta       7
                DeKalb      28
                Douglas      4
                Fayette      5
                Forsyth      2
                Fulton      33
                Gwinnett    15
                Hall        10
                Henry        5
                Newton       5
                Paulding     3
                Rockdale     3
                Spalding     2
                Walton       5
Name: zipcode, dtype: int64

4 invalid zip codes found out of 187 zip codes total (2.14%): ['300HH' '301HH' '302HH' '303HH']


In [29]:
# Creating new df with only our valid zip codes
df_atl_counties_valid_zips = df_atl_counties.loc[df_atl_counties['valid_zip_code'] == True]

In [46]:
# Merging dataframes
test_merge = \
pd.merge(
    df_all_ga_counties,
    df_atl_counties_valid_zips[['zipcode', 'city', 'county']],
    how='inner', # join type
    on=['county'] # join on this col
)

test_merge.shape
test_merge.sample(3)

Unnamed: 0,STATEFP,COUNTYFP,COUNTYNS,AFFGEOID,GEOID,county,NAMELSAD,STUSPS,STATE_NAME,LSAD,ALAND,AWATER,geometry,zipcode,city
175,13,89,1687424,0500000US13089,13089,DeKalb,DeKalb County,GA,Georgia,6,693443933,8858826,"POLYGON ((-84.34809 33.84529, -84.34830 33.857...",30340,Doraville
15,13,63,1672399,0500000US13063,13063,Clayton,Clayton County,GA,Georgia,6,366879097,6962586,"POLYGON ((-84.45856 33.59444, -84.45805 33.629...",30274,Riverdale
30,13,217,1673547,0500000US13217,13217,Newton,Newton County,GA,Georgia,6,709148422,13974069,"POLYGON ((-84.04449 33.52578, -84.00328 33.554...",30014,Zcta 30014


In [34]:
display(
    len(test_merge.loc[test_merge['zipcode'].str.contains('HH')]),
    len(test_merge),
    test_merge['county'].unique(),
    test_merge.sample(5)
)

0

183

array(['Hall', 'Clayton', 'Coweta', 'Barrow', 'Newton', 'Fayette',
       'Rockdale', 'Douglas', 'Cobb', 'Cherokee', 'Spalding', 'Bartow',
       'Butts', 'Walton', 'Gwinnett', 'Carroll', 'Henry', 'Fulton',
       'Forsyth', 'DeKalb', 'Paulding'], dtype=object)

Unnamed: 0,STATEFP,COUNTYFP,COUNTYNS,AFFGEOID,GEOID,county,NAMELSAD,STUSPS,STATE_NAME,LSAD,ALAND,AWATER,geometry,zipcode,city
21,13,77,326666,0500000US13077,13077,Coweta,Coweta County,GA,Georgia,6,1142364907,12680737,"POLYGON ((-85.01536 33.42551, -84.93538 33.437...",30263,Raymond
97,13,135,1688166,0500000US13135,13135,Gwinnett,Gwinnett County,GA,Georgia,6,1115653877,15611813,"POLYGON ((-84.26228 33.98741, -84.18202 33.997...",30071,Norcross
55,13,67,1686112,0500000US13067,13067,Cobb,Cobb County,GA,Georgia,6,880026860,12347127,"POLYGON ((-84.73784 34.07940, -84.65924 34.078...",30082,Smyrna
164,13,89,1687424,0500000US13089,13089,DeKalb,DeKalb County,GA,Georgia,6,693443933,8858826,"POLYGON ((-84.34809 33.84529, -84.34830 33.857...",30084,Tucker
26,13,13,356976,0500000US13013,13013,Barrow,Barrow County,GA,Georgia,6,417082919,4909580,"POLYGON ((-83.81768 34.12749, -83.76753 34.066...",30011,Zcta 30011


In [35]:
#center = [37.546647, -90.373178]
#zoom = 4

test_merge.explore(
    column='NAMELSAD',
    #column='city',
    tooltip=['county', 'GEOID', 'COUNTYFP'],
    popup=True,
    #popup=['county','COUNTYFP','city','zipcode'],
    tiles='CartoDB positron',
    highlight=True,
    cmap='RdYlGn',
    style_kwds=dict(
        #color='white',
        #fillColor='#ff00ff',
        opacity=0.02,
        weight=1.9,
        #dashArray=12,
        fillOpacity=0.04,
        vmin=None,
        vmax=None,
        control_scale=False
     ),
    legend=False,
    legend_kwds=dict(
        caption='County',
        scale=True,
        colorbar=True
    )
)
#style={'color': 'white', 'fillColor': '#ff00ff', 'opacity':0.03, 'weight':1.9, 'dashArray':'2', 'fillOpacity':0.2},
#hover_style={'fillColor': '#ff00ff' , 'fillOpacity': 0.1},

In [36]:
df_all_ga_counties.explore(
    column='NAMELSAD',
    tooltip='county',
    popup=['COUNTYFP','GEOID','LSAD'],
    tiles='CartoDB positron',
    highlight=True,
    cmap='cool',
    style_kwds=dict(
        #color='white',
        #fillColor='#ff00ff',
        opacity=0.05,
        weight= 1.9,
        #dashArray= 12,
        fillOpacity= 0.2
     ),
    legend=False
)
#style={'color': 'white', 'fillColor': '#ff00ff', 'opacity':0.03, 'weight':1.9, 'dashArray':'2', 'fillOpacity':0.2},
#hover_style={'fillColor': '#ff00ff' , 'fillOpacity': 0.1},