<a href="https://colab.research.google.com/github/RuthlessActuary2023/CAS-RPM-2025-Python-Workshop/blob/main/2025_CAS_RPM_Python_Lesson_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Lesson 4: Visualizing Data - Basic Maps

## Import packages & data

In [None]:
import pandas as pd
import numpy as np

pd.set_option("display.max_columns",25)
pd.set_option("display.max_rows",250)
pd.options.display.float_format = '{:,.2f}'.format

from IPython.display import display, HTML

display(HTML(data="""
<style>
    div#notebook-container    { width: 90%; }
    div#menubar-container     { width: 65%; }
    div#maintoolbar-container { width: 99%; }
</style>
"""))

import matplotlib.pyplot as plt

from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

In [None]:
# Code to clone the github repo into colab
!pip install gitpython
import git
import os

repo_url = 'https://github.com/RuthlessActuary2023/CAS-RPM-2025-Python-Workshop.git'
repo_dir = '/content/your-repo'

# Clone if it doesn’t already exist
if not os.path.exists(repo_dir):
    git.Repo.clone_from(repo_url, repo_dir)

In [None]:
repo_dir = '/content/your-repo'
data5 = pd.read_feather(repo_dir + '/data5.ftr')

In [None]:
[i for i in data5.columns]

## Function to create weighted average of value by zip code

In [None]:
### Let's make a function which will output the weighted average values by zip code for the given field
def wtd_avg_val_by_zip(data,field):
    global x, y, z
    x = data.groupby(['zip',field]).agg({'ee_bi': 'sum'}).reset_index()
    y = data.groupby(['zip']).agg({'ee_bi': 'sum'}).reset_index()
    y.rename(columns = {'ee_bi': 'ee_bi_tot'}, inplace = True)

    z = x.merge(y, on = ['zip'])

    z['sp'] = z[field]*z['ee_bi']

    z2 = z.groupby(['zip']).agg({'sp': 'sum', 'ee_bi': 'sum'}).reset_index()
    z2['wtd_avg_val'] = z2['sp']/z2['ee_bi']

    z2 = z2[(['zip','wtd_avg_val'])]

    return z2

## Plotly vs Folium

Plotly gives hover info, is a little bit easier to get started with, and allows the user to save maps in current zoom as a picture with a mouse click

Folium has background cities, streets, land features
Folium may render, scroll, and zoom faster
Folium has code to save as HTML
Folium out of the box does not come with hover info (we will add it ourselves next lesson)

## Plotly Express Map

In [None]:
wtd_avg_val_by_zip(data5,'credit')

In [None]:
### Part 1 - basic ploty express ###
import plotly.express as px
from urllib.request import urlopen
import json

repo_dir = '/content/your-repo'

field = 'credit'
with open(repo_dir + '/oh_ohio_zip_codes_geo.min.json') as f:
    zips_json = json.load(f)

fig = px.choropleth(wtd_avg_val_by_zip(data5,field),
                    geojson = zips_json,
                    locations='zip',
                    color='wtd_avg_val',
                    color_continuous_scale="reds",
                    featureidkey = 'properties.ZCTA5CE10',
                    scope = 'usa',
                    fitbounds = 'locations'
                          )

### Part 2 - adding title

fig.update_layout(
        title_text = 'Weighted Avg ' + field + ' by Zip Code',
        title_x=0.5,
    )

# ### Part 3 - manually adding city markers

cities = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_us_cities.csv')
ohio_cities = cities.loc[cities['name'].isin(['Columbus ','Cleveland ','Cincinnati ','Toledo ','Akron '])]
ohio_cities = ohio_cities[(['name','lat','lon'])].drop_duplicates()

import plotly.graph_objects as go

fig.add_trace(go.Scattergeo(
    lat=ohio_cities['lat'],
    lon=ohio_cities['lon'],
    mode="markers+text",
    marker={
        "color": "Black",
        "line": {
            "width": 1
        },
        "size": 8
    },
    text=ohio_cities['name'],
    textfont={
        "color": "Black",
        "family": "Arial Black",
        "size": 14
    },
    textposition="top center"
))

fig.show()

### state zip code geojson files; may not be totally up to date
# https://github.com/OpenDataDE/State-zip-code-GeoJSON

### color scales for plotly express
# https://plotly.com/python/builtin-colorscales/#named-builtin-continuous-color-scales

## GeoJSON structure

In [None]:
# geojson is a dictionary format which is structured in some way
[i for i in zips_json]

In [None]:
zips_json['type']

In [None]:
# grab the first feature of the json - this looks like it's a list which we can see because of the bracket
zips_json['features'][0:1]

In [None]:
# how many zips are there?
len(zips_json['features'])

In [None]:
# why are the features lists?  are there multiple zip codes per feature?
multi_item_counter = 0

for i in range(len(zips_json['features'])):
    if len(zips_json['features'][i:i+1]) > 1:
        multi_item_counter += 1

    print(str(i) + '   ', end = '\r')

print('       ')
print(multi_item_counter)

## Folium Map

In [None]:
import folium
import json

field = 'credit'
# change to string for folium to merge on
zip_data = wtd_avg_val_by_zip(data5,field)
zip_data['zip'] = zip_data['zip'].astype('str')

# proceed
repo_dir = '/content/your-repo'
with open(repo_dir + '/oh_ohio_zip_codes_geo.min.json') as f:
    zips_json = json.load(f)

# declare map object with location and zoom level
# stroke = False to disable borders
# prefer_canvas = True can increase performance in some cases so I use it by default
m = folium.Map(location = [40, -83], zoom_start = 8, stroke = False, prefer_canvas=True)

choro = folium.Choropleth(
        geo_data = zips_json,
        data = zip_data,
        columns = ['zip','wtd_avg_val'],
        key_on = 'feature.properties.ZCTA5CE10',
        fill_color = 'YlOrRd',
        nan_fill_color = 'gray',
        fill_opacity = 0.5,
        line_opacity = 0.0,
        legend_name= field,
    )

choro.add_to(m)

# Title
title = 'Weighted Avg ' + field + ' by Zip Code'
title_html = '''
             <h3 align="center" style="font-size:16px"><b>{}</b></h3>
             '''.format(title)
m.get_root().html.add_child(folium.Element(title_html))

m

### Folium documentation
# https://python-visualization.github.io/folium/modules.html

### Folium Map with function

### Define Function

In [None]:
# Folium map as a function
import folium

repo_dir = '/content/your-repo'
with open(repo_dir + '/oh_ohio_zip_codes_geo.min.json') as f:
    zips_json = json.load(f)

def folium_map(data,field):
    zip_data = wtd_avg_val_by_zip(data,field)
    zip_data['zip'] = zip_data['zip'].astype('str')

    m = folium.Map(location = [40, -83], zoom_start = 8, stroke = False, prefer_canvas=True)

    choro = folium.Choropleth(
            geo_data = zips_json,
            data = zip_data,
            columns = ['zip','wtd_avg_val'],
            key_on = 'feature.properties.ZCTA5CE10',
            fill_color = 'YlOrRd',
            nan_fill_color = 'gray',
            fill_opacity = 0.5,
            line_opacity = 0.0,
            legend_name= field,
        )

    choro.add_to(m)

    # Title
    title = 'Weighted Avg ' + field + ' by Zip Code'
    title_html = '''
                 <h3 align="center" style="font-size:16px"><b>{}</b></h3>
                 '''.format(title)
    m.get_root().html.add_child(folium.Element(title_html))

    return m

### Check that function works

In [None]:
folium_map(data5,'pop_density_ntile')

### Turn into a widget

In [None]:
fields = [i for i in data5.columns if i not in ['pol_eff_year','ep_bi','ep_col','ee_bi','ee_col','incloss_bi','incloss_col','cc_bi','cc_col','zip','pol_id','vin_id']]

from __future__ import print_function
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual


def f(field):
    return folium_map(data5,field)

interact(f, field = fields);

In [None]:
### Saving your map HTML
# m.save('save_path/save_name.html')

### Lesson 4 Exercise 1
#### Make a new function to create Collision claim frequency by zip
#### Copy and paste the plotly map cell and modify it to create a map of this claim frequency by zip
#### Turn this code into a widget like was done with Folium above