In [3]:
import pandas as pd
import numpy as np

pd.set_option("display.max_columns",25)
pd.set_option("display.max_rows",250)
pd.options.display.float_format = '{:,.2f}'.format

from IPython.display import display, HTML

display(HTML(data="""
<style>
    div#notebook-container    { width: 90%; }
    div#menubar-container     { width: 65%; }
    div#maintoolbar-container { width: 99%; }
</style>
"""))

import matplotlib.pyplot as plt

from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

In [4]:
path = 'C:/Users/marcusdeckert/Box/marcusdeckert/Presentation Work/CARFAX Academy/data/'
data5 = pd.read_feather(path + 'data5.ftr')

In [5]:
[i for i in data5.columns]

['pol_eff_year',
 'ep_bi',
 'ep_col',
 'ee_bi',
 'ee_col',
 'incloss_bi',
 'incloss_col',
 'cc_bi',
 'cc_col',
 'zip',
 'pol_id',
 'vin_id',
 'credit',
 'commute_flag',
 'veh_count_box',
 'driver_count_box',
 'veh_age_box',
 'coll_symb_ntile',
 'limit_bi',
 'ded_coll',
 'DrvAge_box',
 'male',
 'single',
 'widowed',
 'Date',
 'Modeled_Annual_Mileage',
 'Estimated_Current_Mileage',
 'Annual_Mileage_Estimate',
 'Number_of_Titling_Transactions',
 'Lien_Holder',
 'current_ownership_personal',
 'current_ownership_lease',
 'current_ownership_corp_govt',
 'LOO_years',
 'personal_use_flag',
 'rental_use_flag',
 'comm_use_flag',
 'fleet_use_flag',
 'corp_use_flag',
 'lease_flag',
 'curr_owner_odo_cnt',
 'all_owner_odo_cnt',
 'Severe_Problem_flag',
 'Branded_Title_flag',
 'Branded_Title_Loss_flag',
 'Severe_Accident_flag',
 'Other_Severe_Problem_flag',
 'Failed_Emissions_flag',
 'Nonsevere_Accident_flag',
 'Damage_flag',
 'Collision_Repair_Facility_flag',
 'Potential_Damage_flag',
 'Odometer_Prob

In [7]:
### Let's make a function which will output the weighted average values by zip code for the given field
def wtd_avg_val_by_zip(data,field):
    global x, y, z
    x = data.groupby(['zip',field]).agg({'ee_bi': 'sum'}).reset_index()
    y = data.groupby(['zip']).agg({'ee_bi': 'sum'}).reset_index()
    y.rename(columns = {'ee_bi': 'ee_bi_tot'}, inplace = True)
    
    z = x.merge(y, on = ['zip'])
    
    z['sp'] = z[field]*z['ee_bi']
    
    z2 = z.groupby(['zip']).agg({'sp': 'sum', 'ee_bi': 'sum'}).reset_index()
    z2['wtd_avg_val'] = z2['sp']/z2['ee_bi']
    
    z2 = z2[(['zip','wtd_avg_val'])]
    
    return z2

### Plotly vs Folium

In [None]:
# Plotly gives hover info
# Plotly is a little bit easier to get started with
# Plotly allows user to save map in current zoom as picture with mouse click

In [None]:
# Folium has background cities, streets, land features
# Folium seems to render, scroll, and zoom faster
# Folium has code to save as HTML
# Folium out of the box does not come with hover info (we will add it ourselves next lesson)

In [8]:
wtd_avg_val_by_zip(data5,'credit')

Unnamed: 0,zip,wtd_avg_val
0,43001,1.44
1,43003,3.05
2,43004,2.76
3,43006,6.23
4,43008,4.50
...,...,...
1056,45891,7.42
1057,45894,4.29
1058,45895,1.00
1059,45896,4.83


### Plotly Express Map

In [1]:
### Part 1 - basic ploty express ###
import plotly.express as px
from urllib.request import urlopen
import json

path = 'C:/Users/marcusdeckert/Box/marcusdeckert/Presentation Work/CARFAX Academy/Lesson 4 stuff/'

field = 'credit'
with open(path + 'oh_ohio_zip_codes_geo.min.json') as f:
    zips_json = json.load(f)
    
fig = px.choropleth(wtd_avg_val_by_zip(data5,field), 
                    geojson = zips_json, 
                    locations='zip', 
                    color='wtd_avg_val',
                    color_continuous_scale="reds", 
                    featureidkey = 'properties.ZCTA5CE10', 
                    scope = 'usa',
                    fitbounds = 'locations'
                          )

### Part 2 - adding title

fig.update_layout(
        title_text = 'Weighted Avg ' + field + ' by Zip Code',
        title_x=0.5,
    )

# ### Part 3 - manually adding city markers

cities = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_us_cities.csv')
ohio_cities = cities.loc[cities['name'].isin(['Columbus ','Cleveland ','Cincinnati ','Toledo ','Akron '])]
ohio_cities = ohio_cities[(['name','lat','lon'])].drop_duplicates()

import plotly.graph_objects as go

fig.add_trace(go.Scattergeo(
    lat=ohio_cities['lat'],
    lon=ohio_cities['lon'],
    mode="markers+text",
    marker={
        "color": "Black",
        "line": {
            "width": 1
        },
        "size": 8
    },
    text=ohio_cities['name'],
    textfont={
        "color": "Black",
        "family": "Arial Black",
        "size": 14
    },
    textposition="top center"
))

fig.show()

### state zip code geojson files; may not be totally up to date
# https://github.com/OpenDataDE/State-zip-code-GeoJSON

### color scales for plotly express
# https://plotly.com/python/builtin-colorscales/#named-builtin-continuous-color-scales

### GeoJSON structure

In [12]:
# geojson is a dictionary format which is structured in some way
[i for i in zips_json]

['type', 'features']

In [13]:
zips_json['type']

'FeatureCollection'

In [14]:
# grab the first feature of the json - this looks like it's a list which we can see because of the bracket
zips_json['features'][0:1]

[{'type': 'Feature',
  'properties': {'STATEFP10': '39',
   'ZCTA5CE10': '45830',
   'GEOID10': '3945830',
   'CLASSFP10': 'B5',
   'MTFCC10': 'G6350',
   'FUNCSTAT10': 'S',
   'ALAND10': 245664720,
   'AWATER10': 465444,
   'INTPTLAT10': '+40.9084596',
   'INTPTLON10': '-084.0959329',
   'PARTFLG10': 'N'},
  'geometry': {'type': 'MultiPolygon',
   'coordinates': [[[[-84.051797, 40.846578],
      [-84.053422, 40.846572],
      [-84.058143, 40.846593],
      [-84.062935, 40.846592],
      [-84.065125, 40.846594],
      [-84.066055, 40.846587],
      [-84.066278, 40.846585],
      [-84.067145, 40.846579],
      [-84.068737, 40.846582],
      [-84.071053, 40.846575],
      [-84.071057, 40.846714],
      [-84.07106, 40.847188],
      [-84.071056, 40.848023],
      [-84.071066, 40.848851],
      [-84.071073, 40.849537],
      [-84.071075, 40.849683],
      [-84.07108, 40.850515],
      [-84.071076, 40.851096],
      [-84.071075, 40.85136],
      [-84.071084, 40.852199],
      [-84.071098, 4

In [15]:
# how many zips are there?
len(zips_json['features'])

1197

In [16]:
# why are the features lists?  are there multiple zip codes per feature?
multi_item_counter = 0

for i in range(len(zips_json['features'])):
    if len(zips_json['features'][i:i+1]) > 1:
        multi_item_counter += 1
        
    print(str(i) + '   ', end = '\r')

print('       ')
print(multi_item_counter)

0   1   2   3   4   5   6   7   8   9   10   11   12   13   14   15   16   17   18   19   20   21   22   23   24   25   26   27   28   29   30   31   32   33   34   35   36   37   38   39   40   41   42   43   44   45   46   47   48   49   50   51   52   53   54   55   56   57   58   59   60   61   62   63   64   65   66   67   68   69   70   71   72   73   74   75   76   77   78   79   80   81   82   83   84   85   86   87   88   89   90   91   92   93   94   95   96   97   98   99   100   101   102   103   104   105   106   107   108   109   110   111   112   113   114   115   116   117   118   119   120   121   122   123   124   125   126   127   128   129   130   131   132   133   134   135   136   137   138   139   140   141   142   143   144   145   146   147   148   149   150   151   152   153   154   155   156   157   158 

### Folium Map

In [2]:
import folium
import json

field = 'credit'
# change to string for folium to merge on
zip_data = wtd_avg_val_by_zip(data5,field)
zip_data['zip'] = zip_data['zip'].astype('str')

# proceed
path = 'C:/Users/marcusdeckert/Box/marcusdeckert/Presentation Work/CARFAX Academy/Lesson 4 stuff/'
with open(path + 'oh_ohio_zip_codes_geo.min.json') as f:
    zips_json = json.load(f)

# declare map object with location and zoom level
# stroke = False to disable borders
# prefer_canvas = True can increase performance in some cases so I use it by default
m = folium.Map(location = [40, -83], zoom_start = 8, stroke = False, prefer_canvas=True)

choro = folium.Choropleth(
        geo_data = zips_json,
        data = zip_data,
        columns = ['zip','wtd_avg_val'],
        key_on = 'feature.properties.ZCTA5CE10',
        fill_color = 'YlOrRd',
        nan_fill_color = 'gray',
        fill_opacity = 0.5,
        line_opacity = 0.0,
        legend_name= field,
    )

choro.add_to(m)

# Title
title = 'Weighted Avg ' + field + ' by Zip Code'
title_html = '''
             <h3 align="center" style="font-size:16px"><b>{}</b></h3>
             '''.format(title)
m.get_root().html.add_child(folium.Element(title_html))

m

### Folium documentation
# https://python-visualization.github.io/folium/modules.html

### Folium Map with function

In [22]:
# Folium map as a function
import folium

def folium_map(data,field):
    zip_data = ee_by_zip(data,field)
    zip_data['zip'] = zip_data['zip'].astype('str')

    with open(path + 'oh_ohio_zip_codes_geo.min.json') as f:
        zips_json = json.load(f)

    m = folium.Map(location = [40, -83], zoom_start = 8, stroke = False, prefer_canvas=True)

    choro = folium.Choropleth(
            geo_data = zips_json,
            data = zip_data,
            columns = ['zip',field],
            key_on = 'feature.properties.ZCTA5CE10',
            fill_color = 'YlOrRd',
            nan_fill_color = 'gray',
            fill_opacity = 0.5,
            line_opacity = 0.0,
            legend_name= field,
        )

    choro.add_to(m)

    # Title
    title = 'Weighted Avg ' + field + ' by Zip Code'
    title_html = '''
                 <h3 align="center" style="font-size:16px"><b>{}</b></h3>
                 '''.format(title)
    m.get_root().html.add_child(folium.Element(title_html))

    return m

In [55]:
# # Add our widget

# fields = [i for i in data5.columns if i not in ['pol_eff_year','ep_bi','ep_col','ee_bi','ee_col','incloss_bi','incloss_col','cc_bi','cc_col','zip','pol_id','vin_id']]
    
# fields

['credit',
 'commute_flag',
 'veh_count_box',
 'driver_count_box',
 'veh_age_box',
 'coll_symb_ntile',
 'limit_bi',
 'ded_coll',
 'DrvAge_box',
 'male',
 'single',
 'widowed',
 'Date',
 'Modeled_Annual_Mileage',
 'Estimated_Current_Mileage',
 'Annual_Mileage_Estimate',
 'Number_of_Titling_Transactions',
 'Lien_Holder',
 'current_ownership_personal',
 'current_ownership_lease',
 'current_ownership_corp_govt',
 'LOO_years',
 'personal_use_flag',
 'rental_use_flag',
 'comm_use_flag',
 'fleet_use_flag',
 'corp_use_flag',
 'lease_flag',
 'curr_owner_odo_cnt',
 'all_owner_odo_cnt',
 'Severe_Problem_flag',
 'Branded_Title_flag',
 'Branded_Title_Loss_flag',
 'Severe_Accident_flag',
 'Other_Severe_Problem_flag',
 'Failed_Emissions_flag',
 'Nonsevere_Accident_flag',
 'Damage_flag',
 'Collision_Repair_Facility_flag',
 'Potential_Damage_flag',
 'Odometer_Problem_flag',
 'Repossession_flag',
 'CPO_flag',
 'wtd_avg_travel_time',
 'diploma_ntile',
 'pop_density_ntile',
 'unemployment_ntile',
 'Modele

In [77]:
# folium_map(data5,'pop_density_ntile')

In [None]:
### Saving your map HTML
# m.save('save_path/save_name.html')

### Lesson 4 Exercise 1
#### Make a new function to create Collision claim frequency by zip
#### Copy and paste the plotly map cell and modify it to create a map of this claim frequency by zip