Now that we have the databases set, we will start exploring with Plotly and iPython widgets.

In [1]:
import pandas as pd
import numpy as np
from colour import Color

from plotly.offline import init_notebook_mode, download_plotlyjs, plot, iplot
import matplotlib.pyplot as plt

from ipywidgets import interact, interactive, fixed, interact_manual
import plotly.figure_factory as ff
import ipywidgets as widgets

pd.set_option('display.max_columns', None)
init_notebook_mode(connected=True)

In [2]:
records_df = pd.read_csv('data/processed/war_records_cleaned.csv', index_col=0, dtype= {'FIPS':'object'}, infer_datetime_format=['incident_date'])

In [3]:
records_df.head()

Unnamed: 0,draft_type,name,service_branch,rank_rate,pay_grade,occupation_name,birthday,gender,county,country,state_abbr,state_full,marital_status,province,cas_ctry,incident_date,cas_reason,cas_descr,body_recovered,closure_descr,wall,FIPS,lat,lon
0,SELECTED SERVICE,AADLAND GERALD L,ARMY,SP4,E04,INFANTRYMAN,1945-03-29,M,ROBERTS,US,SD,SOUTH DAKOTA,NEVER MARRIED,KONTUM,VS,1968-05-30,KILLED IN ACTION,SMALL ARMS FIRE,Y,BURIED - UNKNOWN DISP,63W 014,46109,45.623397,-96.947551
1,SELECTED SERVICE,AALUND JAMES DOWNING,ARMY,SGT,E05,INFANTRYMAN,1945-05-11,M,HARRIS,US,TX,TEXAS,NEVER MARRIED,PHUOC LONG,VS,1970-02-28,KILLED IN ACTION,ARTILLERY/MORTAR/ROCKET,Y,BURIED - UNKNOWN DISP,13W 066,48201,29.857273,-95.393037
2,ACTIVE - REGULAR,AAMOLD DANIEL LAWRENCE,AIR FORCE,A1C,E03,UNKNOWN,1951-08-23,M,CLAY,US,MN,MINNESOTA,NEVER MARRIED,NINH THUAN,VS,1970-08-20,ACCIDENT,OTHER ACCIDENT,Y,BURIED - UNKNOWN DISP,08W 124,27027,46.898377,-96.494901
3,SELECTED SERVICE,AARDE JAMES RAYMOND,MARINE CORPS,LCPL,E03,RIFLEMAN,1944-06-08,M,KING,US,WA,WASHINGTON,MARRIED,QUANG TRI,VS,1966-12-29,DIED OF WOUNDS,EXPLOSIVE DEVICE,Y,BURIED - UNKNOWN DISP,13E 095,53033,47.493554,-121.832375
4,SELECTED SERVICE,AARON CHARLES EDWARD,ARMY,SP4,E04,INFANTRYMAN,1948-12-12,M,MIDDLESEX,US,MA,MASSACHUSETTS,NEVER MARRIED,BINH THUAN,VS,1970-05-05,KILLED IN ACTION,GRENADE,Y,BURIED - UNKNOWN DISP,11W 098,25017,42.479477,-71.396507


In [4]:
col_to_datetime = ['incident_date', 'birthday']
for col in col_to_datetime:
    records_df[col] = pd.to_datetime(records_df[col])

In [5]:
records_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 58220 entries, 0 to 58219
Data columns (total 24 columns):
draft_type         58220 non-null object
name               58220 non-null object
service_branch     58220 non-null object
rank_rate          58220 non-null object
pay_grade          58220 non-null object
occupation_name    58220 non-null object
birthday           58220 non-null datetime64[ns]
gender             58220 non-null object
county             58220 non-null object
country            58220 non-null object
state_abbr         58220 non-null object
state_full         58220 non-null object
marital_status     58220 non-null object
province           58220 non-null object
cas_ctry           58220 non-null object
incident_date      58220 non-null datetime64[ns]
cas_reason         58220 non-null object
cas_descr          58220 non-null object
body_recovered     58220 non-null object
closure_descr      58220 non-null object
wall               58220 non-null object
FIPS          

In [6]:
records_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 58220 entries, 0 to 58219
Data columns (total 24 columns):
draft_type         58220 non-null object
name               58220 non-null object
service_branch     58220 non-null object
rank_rate          58220 non-null object
pay_grade          58220 non-null object
occupation_name    58220 non-null object
birthday           58220 non-null datetime64[ns]
gender             58220 non-null object
county             58220 non-null object
country            58220 non-null object
state_abbr         58220 non-null object
state_full         58220 non-null object
marital_status     58220 non-null object
province           58220 non-null object
cas_ctry           58220 non-null object
incident_date      58220 non-null datetime64[ns]
cas_reason         58220 non-null object
cas_descr          58220 non-null object
body_recovered     58220 non-null object
closure_descr      58220 non-null object
wall               58220 non-null object
FIPS          

I am planning to use an interactive slider to be able to select what years I want.

1) Get a new df that contains the county (FIPS), by year, with a cum_sum column.  
2) Then geting some interactive sliders working.

Easy.

In [7]:
years = list(range(1956, 1976))

In [8]:
death_by_county_df = pd.DataFrame()

In [9]:
death_by_county_df = pd.DataFrame()

for year in years:
    
    start_date = f"{year}-01-01"
    end_date = f"{year+1}-01-01"
    mask = (records_df.incident_date > start_date) & (records_df.incident_date <= end_date)
    
    place_holder = records_df.loc[mask]
    row_dict = {}
    
    for county_num in place_holder.FIPS.unique():
        try:
            row_dict['FIPS'] = place_holder.loc[place_holder.FIPS == county_num].FIPS.values[0]
            row_dict['county'] = place_holder.loc[place_holder.FIPS == county_num].county.values[0]
            row_dict['state_abbr'] = place_holder.loc[place_holder.FIPS == county_num].state_abbr.values[0]
            row_dict['death_count'] = place_holder.loc[place_holder.FIPS == county_num].shape[0]
            row_dict['year'] = year
#             row_dict['ARMY'] = place_holder.loc[(place_holder.FIPS == county_num) & (place_holder.service_branch == 'ARMY')].shape[0]
#             row_dict['AIR_FORCE'] = place_holder.loc[(place_holder.FIPS == county_num) & (place_holder.service_branch == 'AIR_FORCE')].shape[0]
#             row_dict['MARINES'] = place_holder.loc[(place_holder.FIPS == county_num) & (place_holder.service_branch == 'MARINES')].shape[0]
#             row_dict['NAVY'] = place_holder.loc[(place_holder.FIPS == county_num) & (place_holder.service_branch == 'NAVY')].shape[0]
            death_by_county_df = death_by_county_df.append(row_dict, ignore_index=True)
            
        except:
            pass

In [10]:
death_by_county_df.death_count.cumsum()[-1:]

12334    54371.0
Name: death_count, dtype: float64

In [11]:
death_by_county_df.head()

Unnamed: 0,FIPS,county,death_count,state_abbr,year
0,25021,NORFOLK,1.0,MA,1956.0
1,42021,CAMBRIA,1.0,PA,1957.0
2,31173,THURSTON,1.0,NE,1959.0
3,48099,CORYELL,1.0,TX,1959.0
4,6037,LOS ANGELES,1.0,CA,1960.0


In [12]:
death_by_county_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12335 entries, 0 to 12334
Data columns (total 5 columns):
FIPS           12335 non-null object
county         12335 non-null object
death_count    12335 non-null float64
state_abbr     12335 non-null object
year           12335 non-null float64
dtypes: float64(2), object(3)
memory usage: 481.9+ KB


In [13]:
death_by_county_df.year = pd.to_datetime(death_by_county_df.year, format='%Y')

In [14]:
death_by_county_df.head()

Unnamed: 0,FIPS,county,death_count,state_abbr,year
0,25021,NORFOLK,1.0,MA,1956-01-01
1,42021,CAMBRIA,1.0,PA,1957-01-01
2,31173,THURSTON,1.0,NE,1959-01-01
3,48099,CORYELL,1.0,TX,1959-01-01
4,6037,LOS ANGELES,1.0,CA,1960-01-01


The above code did exclude anyone who didn't have a FIP on their name, so foriegn born. That is why our count isn't whole.

Now all we have left is the slider.

In [15]:
@interact
def Years_of_the_war(x = (1956, 1975, 1)): #Have to put a int to get a slider, but my index is strings. Trying out capabilitles.
    x = str(x)
    return death_by_county_df.loc[death_by_county_df.year == x]

interactive(children=(IntSlider(value=1965, description='x', max=1975, min=1956), Output()), _dom_classes=('wi…

Proof of concept for the slider. All that is left is to find a way to make a proper list of colors to dynamically depeding on the size of the dataframe.

In [16]:
new_df = death_by_county_df.loc[death_by_county_df.year == '1969']
fips = list(new_df.FIPS.values)
values = list(new_df.death_count.values)

In [17]:
red = Color("grey")
colors = list(red.range_to(Color("red"), len(set(values))))
color_hex_list =[]
    
for color in colors:
    color_hex_list.append(color.hex_l)

In [18]:
color_hex_list[:15]

['#808080',
 '#827e7e',
 '#847c7c',
 '#867a7a',
 '#887878',
 '#8a7676',
 '#8c7373',
 '#8f7171',
 '#916f6f',
 '#936d6d',
 '#956b6b',
 '#976969',
 '#996767',
 '#9b6565',
 '#9d6363']

Now I need to make a fucntion that will update a dataframe. That "new" dataframe will feed into a plotly function to create the map. 

In [19]:
def Years_of_the_war(Year = (1956, 1975, 1)): #The variable is the name of the slider.
    Year = str(Year)
    new_df = death_by_county_df.loc[death_by_county_df.year == Year]
    
    fips = list(new_df.FIPS.values)
    values = list(new_df.death_count.values)
    
    blue = Color("blue")
    colors = list(blue.range_to(Color("yellow"),len(set(values)))) #One color for each unique value in my "values" list. Ensures I don't need binning_endpoints
    color_hex_list =[]
    
    for color in colors:
        color_hex_list.append(color.hex_l)
    
    fig = ff.create_choropleth(fips=fips, values=values, scope=['usa'],
                               colorscale=color_hex_list,
                               show_state_data=True,
                               show_hover=True,
                               asp = 2.9,
                               title_text = f"Vietnam Casualties, by county, in {Year}",
                               legend_title = 'Deaths'
    )
    fig.layout.template = None
    return iplot(fig)

In [20]:
interact(Years_of_the_war)

interactive(children=(IntSlider(value=1965, description='Year', max=1975, min=1956), Output()), _dom_classes=(…

<function __main__.Years_of_the_war(Year=(1956, 1975, 1))>

Sweet. I have made a map per year. Now lets add a slider widget to this set up enable to see a span of deaths in years?

In [26]:
range_year_slider = widgets.IntRangeSlider(value = [1958, 1961], min = 1956, max = 1975, step = 1, description="Years")
output1 = widgets.Output()
display(range_year_slider, output1)
 
def Years_of_the_war_slider(year):
    with output1:
        
        start_date = str(year['new'][0])
        end_date = str(year['new'][1])
        
        #print(start_date, end_date)
        
        mask = (death_by_county_df.year > start_date) & (death_by_county_df.year <= end_date)

        new_df = death_by_county_df.loc[mask]

        fips = list(new_df.FIPS.values)
        values = list(new_df.death_count.values)

        blue = Color("cornsilk")
        colors = list(blue.range_to(Color("crimson"),len(set(values)))) #One color for each unique value in my "values" list. Ensures I don't need binning_endpoints
        color_hex_list =[]

        for color in colors:
            color_hex_list.append(color.hex_l)

        fig = ff.create_choropleth(fips=fips, values=values, scope=['usa'],
                                   colorscale=color_hex_list,
                                   show_state_data=True,
                                   show_hover=True,
                                   asp = 2.9,
                                   title_text = f"Vietnam Casualties, by county, from {start_date} to {end_date}",
                                   legend_title = 'Deaths'
        )
        fig.layout.template = None
        return iplot(fig)
range_year_slider.observe(Years_of_the_war_slider, names='value')

IntRangeSlider(value=(1958, 1961), description='Years', max=1975, min=1956)

Output()

In [None]:
buttons = widgets.RadioButtons(
    options=[('All', 'ALL'), ('Army', 'ARMY'), ('Navy', 'NAVY'), ('Marine Corps', 'MARINES'), ('Air Force', 'AIR_FORCE')],
    description='Service Branch:',
    disabled=False
)

In [None]:
int_range = widgets.IntRangeSlider(value = [1958, 1961], min = 1956, max = 1975, step = 1)
output2 = widgets.Output()

display(int_range, output2)

def on_value_change(year):
    with output2:
        print(year['new'])

int_range.observe(on_value_change, names='value')

In [None]:
death_by

In [None]:
def some_random_function(place_holder_variable):
    with output2:
        print(place_holder_variable)
        print(place_holder_variable['new'])

In [None]:
int_range = widgets.IntRangeSlider(value = [1958, 1961], min = 1956, max = 1975, step = 1)
output2 = widgets.Output()
display(int_range, output2)
int_range.observe(some_random_function, names='value')

In [None]:
def what_is_wrong_with_this(place_holder_variable):
#     with output3:
#         print(place_holder_variable)
#         print(place_holder_variable['new'])
    with output2:
        print('FUCKING HELL!')

In [None]:
int_simple_range = widgets.IntSlider(value=5)
output3 = widgets.Output()

display(int_simple_range, output2)
int_simple_range.observe(what_is_wrong_with_this, names='value')

In [None]:
slider.observe()

Step one: Follow these and set up the base map. It is in the mapbox dictionary object where you will specifiy where to center the map https://towardsdatascience.com/how-to-create-interactive-map-plots-with-plotly-7b57e889239a
https://plot.ly/python/mapbox-county-choropleth/


Step two: Layers!
___
https://community.plot.ly/t/create-your-own-choropleth-map-with-custom-shapefiles/2567/20  
DASH WITH CHOLORPLETH MAPS https://github.com/ConnectedSystems/Dash-Choropleth-Example/blob/master/choropleth_example.py  
http://vincepota.com/plotly_choropleth_map.html

In [None]:
#Import geopandas
import geopandas as gpd

Try to get this going https://plot.ly/~empet/14692/mapbox-choropleth-that-works-with-plotly/#/

In [None]:
#Read in the shape file
#shape files found at https://data.humdata.org/dataset/viet-nam-administrative-boundaries-polygon-polyline
vn_df = gpd.read_file('data/geojson/vnm_polbnda_plyl_adm1_2014_pdc/vnm_polbnda_adm1_2014_pdc.shp')

In [None]:
vn_df.head()

In [None]:
vn_df.plot(figsize=(18,18))

In [None]:
list(vn_df.geometry[0])

In [None]:
records_df.groupby('Hor County')

In [None]:
prov = ops_df.location_1

In [None]:
prov.value_counts()

In [None]:
mapbox_key = "pk.eyJ1Ijoic29jam9uIiwiYSI6ImNqenEwZDk4MjBqb3YzaG1tMmsycWEzZXkifQ.Zy16rF6eDSw9pqMA0R-b6Q"
#

In [None]:
from IPython.display import

In [None]:
figure = dict(data = {}, layout = map_layout)
iplot(figure, filename = 'put_your_name_here')

In [None]:
death_df = pd.DataFrame(index=['deaths_in_year'])

In [None]:
years = list(range(1956, 1976))

In [None]:
for year in years:
    
    start_date = f"{year}-01-01"
    end_date = f"{year+1}-01-01"
    mask = (records_df.incident_date > start_date) & (records_df.incident_date <= end_date)
    
    length = records_df.loc[mask].shape[0]
    death_df[f"{year}"] = length

Now that I have death by year, there are just a few more steps that I need to take before I have what I want.

In [None]:
death_df = death_df.T
death_df.head()

Great, we have what we want to far. Now just making a cum_sum column.

In [None]:
death_df['total_deaths'] = death_df.deaths_in_year.cumsum()
death_df.tail()