In [1]:
import pandas as pd
import bqplot
import us
import numpy as np
import matplotlib.pyplot as plt
import csv
import math
import cartopy
import ipywidgets
import matplotlib.cm as cm
%matplotlib inline

In [2]:
# Import code (instructor provided)
from bqplot import DateScale, LinearScale, Axis, Lines, Scatter, Bars, Hist, Figure
from bqplot.interacts import (
    FastIntervalSelector, IndexSelector, BrushIntervalSelector,
    BrushSelector, MultiSelector, LassoSelector, PanZoom, HandDraw
)
from traitlets import link
from bqplot import (
    Figure, Map, Mercator, Orthographic, ColorScale, ColorAxis,
    AlbersUSA, topo_load, Tooltip
)
from ipywidgets import ToggleButtons, VBox, HTML, Dropdown, HBox

In [3]:
# Start code (instructor provided)
names = ["date", "city", "state", "country", "shape", "duration_seconds",
         "duration_reported", "description", "report_date", "latitude",
         "longitude"]
fn = "/srv/nbgrader/data/ufo-scrubbed-geocoded-time-standardized.csv"
ufo = pd.read_csv(fn, names = names, parse_dates = ["date", "report_date"])

abbr_to_fits = us.states.mapping('abbr', 'fips')
ufo["fips"] = ufo["state"].apply(lambda a: int(abbr_to_fits.get(str(a).upper(), -1)))
fips_count = ufo.groupby("fips")["duration_seconds"].count()

## The following are codes for plotting the total number of sightings/duration_seconds in whichever state is highlighted with a function of year

In [4]:
#[code for plot] Get the intended columns and extract year from the date column
basic_info = ufo[['state','date','duration_seconds']]
basic_info['year']=pd.DatetimeIndex(basic_info['date']).year
basic_data = basic_info[['state','year','duration_seconds']]

#[code for plot] Transform the state to fips
abbr_to_fits = us.states.mapping('abbr', 'fips')
basic_data["fips"] = basic_data["state"].apply(lambda a: int(abbr_to_fits.get(str(a).upper(), -1)))
ppdata = basic_data[['fips','year','duration_seconds']]

#[code for plot] Use the groupby function for fips
duration_data = ppdata.groupby(['fips','year']).sum()
sightings_data = ppdata.groupby(['fips','year']).count()
plotdata = duration_data
plotdata['total_sightings']=sightings_data['duration_seconds']

#[code for plot] Quickview of the data for plotting
plotdata.head(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0_level_0,Unnamed: 1_level_0,duration_seconds,total_sightings
fips,year,Unnamed: 2_level_1,Unnamed: 3_level_1
-1,1906,10800.0,1
-1,1916,60.0,1
-1,1936,1200.0,1
-1,1943,1510.0,4
-1,1944,1380.0,3


## Now we define a function for plotting interval plot of 'duration_seconds'

In [5]:
def durationplot(fips):
## First we define a Figure (adapted by instructor's code)
    testdata=plotdata.loc[fips]
    dt_x_fast = LinearScale()
    lin_y = LinearScale()

    dates_actual = testdata.index.values
    prices = testdata.duration_seconds

    x_ax = Axis(label='Year', scale=dt_x_fast)
    x_ay = Axis(label='Duration_seconds', scale=lin_y, orientation='vertical')
    lc = Lines(x=dates_actual, y=prices, scales={'x': dt_x_fast, 'y': lin_y}, colors=['orange'])
    
    return lc

## Now we define a function for plotting interval plot of 'total_sightings'

In [6]:
def sightings(fips):
## First we define a Figure (adapted by instructor's code)
    testdata=plotdata.loc[fips]
    dt_x_fast = LinearScale()
    lin_y = LinearScale()

    dates_actual = testdata.index.values
    prices = testdata.total_sightings

    x_ax = Axis(label='Year', scale=dt_x_fast)
    x_ay = Axis(label='Total_sightings', scale=lin_y, orientation='vertical')
    lc = Lines(x=dates_actual, y=prices, scales={'x': dt_x_fast, 'y': lin_y}, colors=['orange'])

    return lc

## Here is the code for preparing data to plot the map

In [7]:
#[code for map] (adapted by instructor's code)
duration_seconds = ufo.groupby(["fips"])["duration_seconds"].sum()
total_sightings = ufo.groupby(["fips"])["report_date"].count()
#[code for map] (adapted by instructor's code)
df = pd.DataFrame([duration_seconds, total_sightings])
df.rename({'report_date': 'total_sightings'}, inplace=True)
ufo_vals = df.transpose()
# For the normalized part, the reason we load the data manually is to ensure others can execute our code successfully
# We can load the csv file to normalize, but we found out that we can't upload the dataset on 'data-readonly' file, so we manually did this
ufo_vals["Normalized total sightings"] = [None , 4863300, 741894, 6931071, 2988248, 39250017, 5540545, 3576452, 952065, 681170, 20612439, 10310371, 1428557, 1683140, 12801539, 6633053, 3134693, 2907289, 4436974, 4681666, 1331479, 6016447, 6811779, 9928300, 5519952, 2988726, 6093000, 1042520, 1907116, 2940058, 1334795, 8944469, 2081015, 19745289, 10146788, 757952, 11614373, 3923561, 4093465, 12784227, 1056426, 4961119, 865454, 6651194, 27862596, 3051217, 624594, 8411808, 7288000, 1831102, 5778708, 585501, None]
#[code for map] (adapted by instructor's code)
states = cartopy.io.shapereader.natural_earth(resolution='110m', category='cultural',
                                    name='admin_1_states_provinces_lakes_shp')
reader =cartopy.io.shapereader.Reader(states)
all_states = list(reader.records())

## Now we plot the map and interval plot, connect these two plots, with the dropdown function

In [8]:
# Most of the code were adapted by instructor's code
# First we use the field in ipywidgets to implement the dropdown function
@ipywidgets.interact(field = ['total_sightings', 'duration_seconds','Normalized total sightings'] )
def make_plot(field):
    # Loop the data for map plot
    for fips in ufo_vals:
        n = np.log10(ufo_vals.loc[:, field])
        norm = (ufo_vals['total_sightings'])/(ufo_vals['Normalized total sightings'])
    # Design the tooltip
    def_tt = Tooltip(fields=['name', 'color'], labels=['State', field],formats = [])    
    # Design the map
    map_styles = {'scales': {'projection': bqplot.AlbersUSA(),
                      'color': bqplot.ColorScale(colors=["red", "blue"])},
              'color': n.to_dict(),
                 'interactions':{'click': 'select', 'hover': 'tooltip'}}
    states_map = bqplot.Map(map_data=bqplot.topo_load('map_data/USStatesMap.json'), tooltip=def_tt,
        **map_styles) 
    # Design the interaction
    states_map.interactions = {'click': 'select', 'hover': 'tooltip'}
    # Design the figure
    map_fig = bqplot.Figure(marks=[states_map], title='USA',fig_margin={'top':0,'bottom':20,'left':10,'right':10})
# Now we design the plot for the 'total_sightings' dropdown    
    if field == 'total_sightings':
        # This is the default data
        testdata=plotdata.loc[1]
        # We tell the user to select a state and imply the meaning of the default(first) interval plot
        print('Please select a state, the default state is Alabama.')
        # First we define a Figure
        dt_x_fast = LinearScale()
        lin_y = LinearScale()
        # Get the x and y values for plotting
        dates_actual = testdata.index.values
        prices = testdata.total_sightings
        # Design the plot
        x_ax = Axis(label='Year', scale=dt_x_fast)
        x_ay = Axis(label='Total_sightings', scale=lin_y, orientation='vertical')
        lc = Lines(x=dates_actual, y=prices, scales={'x': dt_x_fast, 'y': lin_y}, colors=['orange'])
        # Next we define the type of selector we would like
        intsel_fast = FastIntervalSelector(scale=dt_x_fast, marks=[lc])
        # Now, we define a function that will be called when the FastIntervalSelector is interacted with
        md = {}
        def fast_interval_change_callback(change):
            ind = pd.to_datetime(change.new)
            ind = change.new
            tot = testdata.loc[ind[0]:ind[1]]["total_sightings"].sum()
            db_fast.value = 'The selected period is ' + str(change.new).split(' ')[0][1:5] + ' to '+ str(change.new).split(' ')[1][0:4] + ' total: ' + str(tot)
        ## Now we connect the selectors to that function
        intsel_fast.observe(fast_interval_change_callback, names=['selected'])
        ## We use the HTML widget to see the value of what we are selecting
        ## and modify it when an interaction is performed on the selector
        db_fast = HTML()
        db_fast.value = 'The selected period is ' + str(intsel_fast.selected)

        fig_fast_intsel = Figure(marks=[lc], axes=[x_ax, x_ay], title='Total_sightings of selected state',
                         interaction=intsel_fast,fig_margin={'top':0,'bottom':20,'left':50,'right':180}) 
        #This is where we assign the interaction to this particular Figure

        # Define a callback function to plot the inverval, when the input after first click is NULL, 
        # we execute a nonsense line to avoid reporting NoneType error
        def change_selected(event):
            try: 
                lc.y=sightings(event.new[-1]).y
                
            except:
                t=1+1
        states_map.observe(change_selected, "selected")
        # Connect the map and the interval plot using HBox and VBox
        display(HBox([map_fig,VBox([db_fast, fig_fast_intsel])]))

# Now we design the plot for the 'duration_seconds' dropdown       
    elif field == 'duration_seconds':
        # This is the default data
        testdata=plotdata.loc[1]
        # We tell the user to select a state and imply the meaning of the default(first) interval plot
        print('Please select a state, the default state is Alabama.')
        # First we define a Figure
        dt_x_fast = LinearScale()
        lin_y = LinearScale()
        # Get the x and y values for plotting
        dates_actual = testdata.index.values
        prices = testdata.duration_seconds
        # Design the plot
        x_ax = Axis(label='Year', scale=dt_x_fast)
        x_ay = Axis(label='Duration_seconds', scale=lin_y, orientation='vertical')
        lc = Lines(x=dates_actual, y=prices, scales={'x': dt_x_fast, 'y': lin_y}, colors=['orange'])
        # Next we define the type of selector we would like
        intsel_fast = FastIntervalSelector(scale=dt_x_fast, marks=[lc])
        # Now, we define a function that will be called when the FastIntervalSelector is interacted with
        md = {}
        def fast_interval_change_callback(change):
            ind = pd.to_datetime(change.new)
            ind = change.new
            tot = testdata.loc[ind[0]:ind[1]]["duration_seconds"].sum()
            db_fast.value = 'The selected period is ' + str(change.new).split(' ')[0][1:5] + ' to '+ str(change.new).split(' ')[1][0:4] + ' total: ' + str(tot)
        ## Now we connect the selectors to that function
        intsel_fast.observe(fast_interval_change_callback, names=['selected'])
        ## We use the HTML widget to see the value of what we are selecting
        ## and modify it when an interaction is performed on the selector
        db_fast = HTML()
        db_fast.value = 'The selected period is ' + str(intsel_fast.selected)

        fig_fast_intsel = Figure(marks=[lc], axes=[x_ax, x_ay], title='Duration_Seconds of selected state',
                         interaction=intsel_fast,fig_margin={'top':10,'bottom':20,'left':50,'right':180}) 
        #This is where we assign the interaction to this particular Figure

        # Define a callback function to plot the inverval, when the input after first click is NULL, 
        # we execute a nonsense line to avoid reporting NoneType error
        def change_selected(event):
            try:
                lc.y=durationplot(event.new[-1]).y
            except:
                t=1+1
        states_map.observe(change_selected, "selected")
        # Connect the map and the interval plot using HBox and VBox
        display(HBox([map_fig,VBox([db_fast, fig_fast_intsel])]))

# Now we design the plot for the 'Normalized total_sightings' dropdown           
    elif field == 'Normalized total sightings':
        # This is the default data
        testdata=plotdata.loc[1]
        # We tell the user to select a state and imply the meaning of the default(first) interval plot 
        print('Please select a state, the default state is Alabama.')
        # First we define a Figure
        dt_x_fast = LinearScale()
        lin_y = LinearScale()
        # Get the x and y values for plotting
        dates_actual = testdata.index.values
        prices = testdata.total_sightings
        # Design the plot
        x_ax = Axis(label='Year', scale=dt_x_fast)
        x_ay = Axis(label='Total_sightings', scale=lin_y, orientation='vertical')
        lc = Lines(x=dates_actual, y=prices, scales={'x': dt_x_fast, 'y': lin_y}, colors=['orange'])
        # Next we define the type of selector we would like
        intsel_fast = FastIntervalSelector(scale=dt_x_fast, marks=[lc])
        # Now, we define a function that will be called when the FastIntervalSelector is interacted with
        md = {}
        def fast_interval_change_callback(change):
            ind = pd.to_datetime(change.new)
            ind = change.new
            tot = testdata.loc[ind[0]:ind[1]]["total_sightings"].sum()
            db_fast.value = 'The selected period is ' + str(change.new).split(' ')[0][1:5] + ' to '+ str(change.new).split(' ')[1][0:4] + ' total: ' + str(tot)            
        ## Now we connect the selectors to that function
        intsel_fast.observe(fast_interval_change_callback, names=['selected'])
        ## We use the HTML widget to see the value of what we are selecting
        ## and modify it when an interaction is performed on the selector
        db_fast = HTML()
        db_fast.value = 'The selected period is ' + str(intsel_fast.selected)
        fig_fast_intsel = Figure(marks=[lc], axes=[x_ax, x_ay], title='Total_sightings of selected state',
                         interaction=intsel_fast,fig_margin={'top':0,'bottom':20,'left':45,'right':180}) 
        # The above line is where we assign the interaction to this particular Figure

        # Define a callback function to plot the inverval, when the input after first click is NULL, 
        # we execute a nonsense line to avoid reporting NoneType error
        def change_selected(event):
            try:
                lc.y=sightings(event.new[-1]).y                
            except:
                t = 1+1
        states_map.observe(change_selected, "selected")
        # Connect the map and the interval plot using HBox and VBox
        display(HBox([map_fig,VBox([db_fast, fig_fast_intsel])]))


## Why I took the approach I did

To display the USA map, although it can be done by using cartophy package, we chose to use bqplot. It is because bqplot easier to bring interactivity to the map while interactivity in cartopy requires the help of JavaScript. 

For the normalized total sightings data, we divided the total sightings of each state by its population. We did it because some states have a bigger population, it has a higher chance to receive more sighting reports for those states. Thus, we normalized the data to get the ratio of total sightings to population. In order to get the population number, we originally loaded another csv file, grabbed the population number and appended it to the ufo_vals table. But after second thoughts, we are afraid that professor may not be able to read those data since it is stored in our computers. So, we decided to play safe and not to load those data from another file. Instead, we entered those number manually to make sure others can also read the data.

For the interval selected function, we used the 'FastIntervalSelector' in bqplot and we designed a callback function named 'fast_interval_change_callback' to get the sum of y values in the selected time period, which is quite easy and quick to achieve that. 

When it comes to connection the map and the interval plot, we use the VBox and HBox in ipywidgets, and we define a callback function to make sure the connection is correct. 

## Strengths of my approach  
- Multi-function and interactivity plot in one area. It is very suitable for presentation and operation.

bqplot:
- It is easy to modify the notebook with Python. The notebook and JupyterLab both give us the ability to have web application type interactivity with minimal trouble. Every element of the map is an interactive widget, which can be bound to an arbitrary call back function.
- No JavaScript is required. So, it is easier to bring interactivity to the graphs.
- It looks more beautiful than other packages. We can easily change the color of the map as we want.
- Bringing more interactivity to data.
- In bqplot almost everything is an object and we can be modify them live due to clever use of widgets.

Normalized total sighting data:
- People must be able to read the population data and see the corresponding map since the data is hard-coded.

## Weaknesses of my approach  
  
- It seemed not very easy to export the plots out of the ipython/Jupyter environment with interactivity still goes well.
- For the map click function, when we clicked the second state, the first state we clicked before will not disappear. So we have to reclick the first state in order to make it disappear. This may bring some of inconveniences when the user operate it.  
Normalized total sighting data:
- It wastes time to input all the numbers manually.
- Higher human error rate when the data is long and complex.

## What I wished I had been able to do (if anything)
- In the current tooltip, we showed the name of the states and the corresponding sighting data. We wish that in the tooltip content, we could display the original number of total sightings, duration seconds instead of showing the logged number. 
- Since the hover function of tooltip is only available before we select the state, we also wish that we could make it available too even after we click on any states.
- For the color of the map, we use red color to represent small numbers and blue to represent large numbers. However, it is hard to tell what the colors mean just by looking at the map. We wish that we could draw a colorbar below the map to indicate the color.
- In the interval map of duration seconds, we wish that we could increase the distance between the tick values and the label of y-axis because currently there're some overlapping part between these two. They are kinda sticked together right now because the tick values are too long.