In [1]:
import pandas as pd
import bqplot
import yt
import us
import ipywidgets
import numpy as np

symbol_s = 'Total Sightings'
symbol_t = 'Total Duration Seconds'

In [2]:
from bqplot import ColorScale, LinearScale, LogScale, Axis, Lines, Figure
from ipywidgets import Layout, Dropdown, HBox, VBox, HTML
from traitlets import link
from bqplot.interacts import FastIntervalSelector

## UFO Data

In [3]:
names = ["date", "city", "state", "country", "shape", "duration_seconds",
         "duration_reported", "description", "report_date", "latitude",
         "longitude"]
fn = "/srv/nbgrader/data/ufo-scrubbed-geocoded-time-standardized.csv"
ufo = pd.read_csv(fn, names = names, parse_dates = ["date", "report_date"])

In [4]:
ufo['year'] = ufo['date'].dt.year #extract year from date column and add year column to ufo
ufo = ufo.set_index('year') #set year as the index of ufo

In [24]:
abbr_to_fips = us.states.mapping('abbr', 'fips') #state abbreviation to FIPS code
ufo["fips"] = ufo["state"].apply(lambda a: int(abbr_to_fips.get(str(a).upper(), -1))) #add fips column to ufo
fips_count = ufo.groupby("fips")["report_date"].count()
total_s = fips_count #total sightings of all time aggregated by state
total_t = ufo.groupby("fips")["duration_seconds"].sum() #total duration seconds of all time aggregated by state
label_s = ['State', 'Total sightings'] #tooltip label when field selected is Total sightings.
label_t = ['State', 'Total time'] #tooltip label when field selected is Total time.

In [25]:
total_sightings = ufo.groupby(["fips", "year"])["duration_seconds"].count()  #total sightings in each state aggregated by year 
total_time = ufo.groupby(["fips", "year"])["duration_seconds"].sum() #total duration seconds in each state aggregated by year

In [41]:
ufo_vals = pd.DataFrame([total_s, total_t])
ufo_vals.rename({'report_date': 'total_sightings', 'duration_seconds': 'total_time'}, inplace=True)
ufo_vals = ufo_vals.transpose()
ufo_vals = ufo_vals.reset_index()

In [42]:
total_s_yr = ufo.groupby('year')["duration_seconds"].count() #total sightings in all states aggregated by year.
total_t_yr = ufo.groupby('year')["duration_seconds"].sum() #total duration seconds in all states aggregated by year.

In [53]:
total_s.head()

fips
-1    9394
 1     691
 2     354
 4    2689
 5     666
Name: report_date, dtype: int64

In [54]:
total_sightings.head()

fips  year
-1    1906    1
      1916    1
      1936    1
      1943    4
      1944    3
Name: duration_seconds, dtype: int64

In [55]:
total_s_yr.head()

year
1906    1
1910    2
1916    1
1920    1
1925    1
Name: duration_seconds, dtype: int64

## State Information Data

In [58]:
names1 = ["population", "geo", "pop_per_sqmile", "area", "fips"]
state_info = pd.read_csv("census_state_quickfacts.csv", names = names1)
#state_info data downloaded from https://data.world/aaronhoffman/census-gov-state-quickfacts
#selected "Population; Census; April 1; 2010", "Geography", "Population per square mile; 2010", 
#"Land area in square miles; 2010", "FIPS Code". Data cleaned and transposed before uploaded into Jupyter Notebok.

In [59]:
ufo_all = ufo_vals.join(state_info.set_index('fips'), on='fips')
#join columns from state_info with ufo_vals on fips column.

In [60]:
ufo_all.head()

Unnamed: 0,fips,total_sightings,total_time,population,geo,pop_per_sqmile,area
0,-1,9394.0,318566100.0,,,,
1,1,691.0,962845.5,4779736.0,Alabama,94.4,50645.33
2,2,354.0,1498068.0,710231.0,Alaska,1.2,570640.95
3,4,2689.0,15990940.0,6392017.0,Arizona,56.3,113594.08
4,5,666.0,67177510.0,2915918.0,Arkansas,56.0,52035.48


## Normalization

In [61]:
ufo_all['total_sightings_capita'] = ufo_all['total_sightings']/ufo_all['population']
ufo_all['total_time_capita'] = ufo_all['total_time']/ufo_all['population']
ufo_all['total_sightings_sqmile'] = ufo_all['total_sightings']/ufo_all['area']
ufo_all['total_time_sqmile'] = ufo_all['total_time']/ufo_all['area']

In [62]:
ufo_all.head()

Unnamed: 0,fips,total_sightings,total_time,population,geo,pop_per_sqmile,area,total_sightings_capita,total_time_capita,total_sightings_sqmile,total_time_sqmile
0,-1,9394.0,318566100.0,,,,,,,,
1,1,691.0,962845.5,4779736.0,Alabama,94.4,50645.33,0.000145,0.201443,0.013644,19.011536
2,2,354.0,1498068.0,710231.0,Alaska,1.2,570640.95,0.000498,2.109269,0.00062,2.625237
3,4,2689.0,15990940.0,6392017.0,Arizona,56.3,113594.08,0.000421,2.501705,0.023672,140.772645
4,5,666.0,67177510.0,2915918.0,Arkansas,56.0,52035.48,0.000228,23.038204,0.012799,1290.994423


In [None]:
## First we define a Figure
dt_x_fast = LinearScale() #scale of x-axis
lin_y = LinearScale() #scale of y-axis

x_ax = Axis(label='Year', scale=dt_x_fast) #x-axis
x_ay = Axis(scale=lin_y, orientation='vertical') #y-axis
lc = Lines(x=total_s_yr.index.values, y=total_s_yr.values, scales={'x':dt_x_fast,'y':lin_y}, colors = ['orange'])
#lc is the line of total sightings as a function of year.

In [None]:
map_styles = {'scales': {'projection': bqplot.AlbersUSA(scale_factor=1500),
                         'color': bqplot.ColorScale(scheme='RdBu')},
              'color': total_s.to_dict()}
map_tt = bqplot.Tooltip(fields = ['name','color'], labels = ['State','Total sightings'])
states_map = bqplot.Map(map_data=bqplot.topo_load('map_data/USStatesMap.json'),
                        interactions = {'click':'select','hover':'tooltip'},
                        **map_styles, tooltip = map_tt)
map_fig = bqplot.Figure(marks=[states_map], fig_margin={'top':60,'bottom':5,'left':-5,'right':-10},
                        layout=ipywidgets.Layout(height='520px', width='520px'),
                    title='UFO Sightings in the United States')

In [None]:
numerical_cols = ['Total sightings','Total time']
field_select = Dropdown(options=['Total sightings','Total time'], value='Total sightings', description='Field')

In [None]:
index = int() # index being an integer, the fips we sued to identify the state selected.
def state_select(change):
    ind = intsel_fast.selected
    if states_map.selected:
        for i in np.array(list(abbr_to_fips.values())): # get the fips value from abbr_to_fips
            if i is not None:
                if states_map.selected[-1] == int(i): # fips value string to integer
                    global index
                    index = int(i)             
                    if field_select.value == 'Total sightings':
                        lc.x = total_sightings[index].index.values
                        lc.y = total_sightings[index].values
                        if ind is not None:
                            if any(ind):
                                tot = total_sightings[index].loc[ind[0]:ind[-1]].sum()
                                db_fast.value = 'The selected period is '+str(int(ind[0]))+' to '+str(int(ind[-1]))+'. Total sightings: '+str(tot)
                    
                    if field_select.value == 'Total time':
                        lc.x = total_time[index].index.values
                        lc.y = total_time[index].values 
                        if ind is not None:
                            if any(ind):
                                tot = total_time[index].loc[ind[0]:ind[-1]].sum()
                                db_fast.value = 'The selected period is '+str(int(ind[0]))+' to '+str(int(ind[-1]))+'. Total duration seconds: '+str(tot)
    else:
        index = int()
        if field_select.value == 'Total sightings':
            lc.x = total_s_yr.index.values
            lc.y = total_s_yr.values
        if field_select.value == 'Total time':
            lc.x = total_t_yr.index.values
            lc.y = total_t_yr.values
        if ind is not None:
            if any(ind):
                if field_select.value == 'Total sightings':
                    tot = total_s_yr.loc[ind[0]:ind[-1]].sum() 
                    db_fast.value = 'The selected period is '+str(int(ind[0]))+' to '+str(int(ind[-1]))+'. Total sightings: '+str(tot)
                if field_select.value == 'Total time':
                    tot = total_t_yr.loc[ind[0]:ind[-1]].sum()
                    db_fast.value = 'The selected period is '+str(int(ind[0]))+' to '+str(int(ind[-1]))+'. Total duration seconds: '+str(tot)
        
states_map.observe(state_select, 'selected')

In [None]:
## Next we define the type of selector we would like
intsel_fast = FastIntervalSelector(scale=dt_x_fast, marks=[lc])

## Now, we define a function that will be called when the FastIntervalSelector is interacted with
def fast_interval_change_callback(change):
    ind = change.new
    if any(ind):
        if field_select.value == 'Total sightings':
            if states_map.selected:
                tot = total_sightings[index].loc[ind[0]:ind[-1]].sum()               
            else:
                tot = total_s_yr.loc[ind[0]:ind[-1]].sum()
            db_fast.value = 'The selected period is '+str(int(ind[0]))+' to '+str(int(ind[-1]))+'. Total sightings: '+str(tot)
        if field_select.value == 'Total time':
            if states_map.selected:
                tot = total_time[index].loc[ind[0]:ind[-1]].sum()
            else:
                tot = total_t_yr.loc[ind[0]:ind[-1]].sum()
            db_fast.value = 'The selected period is '+str(int(ind[0]))+' to '+str(int(ind[-1]))+'. Total duration seconds: '+str(tot)

## Now we connect the selectors to that function
intsel_fast.observe(fast_interval_change_callback, names=['selected'])

In [None]:
## We use the HTML widget to see the value of what we are selecting and modify it when an interaction is performed
## on the selector
db_fast = HTML()
db_fast.value = 'The selected period is ' + str(intsel_fast.selected)

fig_fast_intsel = Figure(marks=[lc], axes=[x_ax, x_ay], fig_margin={'top':59,'bottom':40,'left':50,'right':0},
                         layout=ipywidgets.Layout(height='450px', width='450px'),
                         interaction=intsel_fast) #This is where we assign the interaction to this particular Figure

In [None]:
# function for dropdown box field change

def field_changed(change):
    field = change['new']
    ind = intsel_fast.selected
    if field == 'Total sightings':
        states_map.color = total_s.to_dict()
        map_tt.labels = label_s
        if states_map.selected:
            lc.x = total_sightings[index].index.values
            lc.y = total_sightings[index].values
            if ind is not None:
                if any(ind):
                    tot = total_sightings[index].loc[ind[0]:ind[-1]].sum()
                    db_fast.value = 'The selected time period is %s to %s. Total sightings: %s'%(int(ind[0]),int(ind[-1]),str(tot))
        else:
            lc.x = total_s_yr.index.values
            lc.y = total_s_yr.values
            if ind is not None:
                if any(ind):
                    tot = total_s_yr.loc[ind[0]:ind[-1]].sum()
                    db_fast.value = 'The selected time period is %s to %s. Total sightings: %s'%(int(ind[0]),int(ind[-1]),str(tot))
            
    if field == 'Total time':
        states_map.color = total_t.to_dict()
        map_tt.labels = label_t
        if states_map.selected:
            lc.x = total_time[index].index.values
            lc.y = total_time[index].values
            if ind is not None:
                if any(ind):
                    tot = total_time[index].loc[ind[0]:ind[-1]].sum()
                    db_fast.value = 'The selected time period is %s to %s. Total duration seconds: %s'%(int(ind[0]),int(ind[-1]),str(tot))
        else:
            lc.x = total_t_yr.index.values
            lc.y = total_t_yr.values
            if ind is not None:
                if any(ind):
                    tot = total_t_yr.loc[ind[0]:ind[-1]].sum()
                    db_fast.value = 'The selected time period is %s to %s. Total duration seconds: %s'%(int(ind[0]),int(ind[-1]),str(tot))

field_select.observe(field_changed, 'value')

In [None]:
def a():
    b = HBox([VBox([field_select, map_fig]), VBox([db_fast, fig_fast_intsel])])
    return b

In [None]:
a()