### This component builds an interactive visualization of the UFO sighting database. It includes a colored US map with information tooltip, a plot displaying total number of sightings and total duration of sightings with choices.
#### UFO Sightings: https://www.kaggle.com/NUFORC/ufo-sightings.

In [None]:
import pandas as pd
import numpy as np
import bqplot
import yt
import us
import math
from tqdm import tqdm
from bqplot import LinearScale, Axis, Lines, Figure, LogScale
from bqplot.interacts import FastIntervalSelector
from ipywidgets import VBox, HTML, Dropdown, HBox

### 1. Read and clean Data, set year as index, and aggregate by year
#### 1) Read data, and clean data to adjust it to proper format

In [2]:
names = ["date", "city", "state", "country", "shape", "duration_seconds",
         "duration_reported", "rdescription", "report_date", "latitude",
         "longitude"]
fn = "ufo-scrubbed-geocoded-time-standardized.csv"
ufo = pd.read_csv(fn, names = names, parse_dates = ["date", "report_date"])
state_info = pd.read_csv("state_info.txt")

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
def make_float(v):
    try:
        return float(v)
    except:
        return v

In [4]:
ls = list(ufo["duration_seconds"])
selected = []

for i in tqdm(range(len(ls))):
#     d = ls[i]
    item = make_float(ls[i])
    if type(item) == str:
        selected.append(i)
    else:
        ufo.loc[i, "duration_seconds"] = item
for i in selected:
    ufo.loc[i, 'duration_seconds'] = float(ufo.loc[i, 'duration_seconds'].replace("`", ""))
    

100%|██████████| 80332/80332 [02:35<00:00, 515.50it/s]


In [5]:
ufo["duration_seconds"].values

array([2700.0, 7200.0, 20.0, ..., 1200.0, 5.0, 1020.0], dtype=object)

#### 2) Include 'fips' into ufo dataset and aggregate state area data into ufo dataset by matching values in fips and GEOID

In [6]:
abbr_to_fits = us.states.mapping('abbr', 'fips')
ufo["fips"] = ufo["state"].apply(lambda a: int(abbr_to_fits.get(str(a).upper(), -1)))
fips_count = ufo.groupby("fips")["duration_seconds"].count()

gg = ufo.groupby("fips")
total_time = []
fips_ind = []
sum_dur = 0
for name, group in gg:
    for i in group["duration_seconds"]:
        sum_dur += i
    total_time.append(sum_dur)
    fips_ind.append(name)
    sum_dur = 0
       
total_time_all = pd.Series(total_time, index = fips_ind)
ufo["GEOID"] = [int(i) for i in ufo["fips"]]
ufo.set_index("GEOID")
ufo = ufo.join(state_info.set_index("GEOID"), on="GEOID", lsuffix="org", rsuffix="new")


In [7]:
import datetime
format_str = '%m/%d/%Y %H:%M'
ll = list(ufo["date"])
for idx in tqdm(range(len(ll))): 
    if type(ll[idx]) == datetime.datetime: continue
    
    try:
        ufo.loc[idx,"date"] = datetime.datetime.strptime(ll[idx], format_str)
    except ValueError:
        ll[idx] = ll[idx].replace('24:', '23:')
        ll[idx] = datetime.datetime.strptime(ll[idx], format_str)
        ll[idx] += datetime.timedelta(hours=1)
        ufo.loc[idx, "date"] = ll[idx]
    

100%|██████████| 80332/80332 [05:40<00:00, 235.81it/s]


#### 3) Set "year" as index of UFO dataset, normalize the total sightings as well as total duration per year by states area

In [8]:
ufo["year"] = [i.year for i in ufo["date"]]
ufo = ufo.set_index("year")

In [9]:
ufo["count"] = [1/i for i in ufo["Area"]]
ufo["duration_norm"] = ufo["duration_seconds"]/ufo["Area"]

In [10]:
total_sightings = ufo.groupby(["state", "year"])["count"].sum()
total_time = ufo.groupby(["state","year"])["duration_norm"].sum()

In [11]:
total_sightings_unnorm = ufo.groupby(['state','year'])['count'].count()
total_time_unnorm = ufo.groupby(['state','year'])['duration_seconds'].sum()


In [12]:
total_sightings_year = ufo.groupby('year')
total_sightings_year = total_sightings_year['count'].count()
total_duration_year = ufo.groupby('year')['duration_seconds'].sum()

In [13]:
total_sightings_state1 = ufo.groupby('fips')['count'].sum()
total_duration_state = ufo.groupby('fips')['duration_norm'].sum()

In [60]:
total_sightings_state = {}
for i, j in zip(total_sightings_state1, total_sightings_state1.index):
    if i==0:
        continue
    total_sightings_state[j] = math.log2(i)

nan
nan


### 2. Create US map and plots of total sightings and total duration in the function of year
#### 1) Get US map data and add id, coordinate, area properties to the dictionary

In [16]:
geom_data = bqplot.topo_load('map_data/USStatesMap.json')
for i in geom_data['objects']['subunits']['geometries']:
    for row in state_info.itertuples():
        if i['id'] == row.GEOID:
            i['coordinate'] = [row.INTPTLAT,row.INTPTLON]
            i['area'] = row.Area

#### 2) Observe selecting function of the map, and connect the selected state to plots

In [17]:
sighting_text = 'Number'
time_text = 'Duration Time'
title = HTML()
title.value = '<center><font size="+2"><b>Total %s of UFO Sightings in US by Year</b></font></center>'%(sighting_text)


In [55]:
map_styles = {'scales': {'projection': bqplot.AlbersUSA(),
                         'color': bqplot.ColorScale(colors=["#c7e9b4","#253494"])},
              'color': total_sightings_state}
map_tt = bqplot.Tooltip(fields = ['name','id'], labels = ['State','ID'])
states_map = bqplot.Map(map_data=geom_data,
                        interactions = {'click':'select','hover':'tooltip'},
                        unslected_styles = {'opacity': 0.4},
                        **map_styles, tooltip = map_tt, display_legend = False, labels=['UFO Sightings'])
fig_map = bqplot.Figure(marks=[states_map] , fig_margin = {'top':20,'bottom':30,'left':-105,'right':0})


#### 3) Record the selected state and choice of observed object(duration_time/observed times)

In [15]:
state_seleceted = ''
dt_x_fast = LinearScale()
lin_y = LinearScale()
log_y = LogScale()

x_ax = Axis(label = "Year", scale = dt_x_fast)
y_ay_S = Axis(scale = lin_y, orientation = "vertical")
y_ay_D = Axis(scale = log_y, orientation = "vertical")

In [19]:
def observe_selected(change):
    ind = intsel_fast.selected
    if states_map.selected:
        for abbr,fips in abbr_to_fits.items():  
            if fips:
                if int(fips) == states_map.selected[-1]:
                    global state_selected
                    state_selected = abbr.lower()                  
                    if cap_select.value == 'Total Sightings':
                        plot_all.x = total_sightings[state_selected].index.values
                        plot_all.y = total_sightings[state_selected][:]
                        title.value = '<center><font size="+2"><b>Normalized Total %s of UFO Sightings in %s by Year</b></font></center>'%(
                                      sighting_text,state_selected.upper())
                        if not ind is None:
                            if any(ind):
                                tot = total_sightings_unnorm[state_selected].loc[ind[0]:ind[1]].sum()
                                db_fast.value = '<center>The selected time period: <strong>%s - %s</strong>. Total sightings: <strong>%s</strong></center>'%(
                                                int(ind[0]),int(ind[1]),str(tot))
                    
                    if cap_select.value == 'Total Duration Time':
                        plot_all.x = total_time[state_selected].index.values
                        plot_all.y = total_time[state_selected][:]
                        title.value = '<center><font size="+2"><b>Normalized Total %s of UFO Sightings in %s by Year</b></font></center>'%(
                                      time_text,state_selected.upper())
                        if not ind is None:
                            if any(ind):
                                tot = total_time_unnorm[state_selected].loc[ind[0]:ind[1]].sum()
                                db_fast.value = '<center>The selected time period: <strong>%s - %s</strong>. Total duration seconds: <strong>%s</strong></center>'%(
                                                int(ind[0]),int(ind[1]),str(tot))
    else:
        state_selected = ''
        if cap_select.value == 'Total Sightings':
            plot_all.x = total_sightings_year.index.values
            plot_all.y = total_sightings_year[:]
            title.value = '<center><font size="+2"><b>Total %s of UFO Sightings in US by Year</b></font></center>'%(sighting_text)
        if cap_select.value == 'Total Duration Time':
            plot_all.x = total_duration_year.index.values
            plot_all.y = total_duration_year[:]
            title.value = '<center><font size="+2"><b>Total %s of UFO Sightings in US by Year</b></font></center>'%(time_text)
        if not ind is None:
            if any(ind):
                if cap_select.value == 'Total Sightings':
                    tot = total_sightings_year.loc[ind[0]:ind[1]].sum() 
                    db_fast.value = '<center>The selected time period: <strong>%s - %s</strong>. Total sightings: <strong>%s</strong></center>'%(
                                    int(ind[0]),int(ind[1]),str(tot))
                if cap_select.value == 'Total Duration Time':
                    tot = total_duration_year.loc[ind[0]:ind[1]].sum()
                    db_fast.value = '<center>The selected time period: <strong>%s - %s</strong>. Total duration seconds: <strong>%s</strong></center>'%(
                                    int(ind[0]),int(ind[1]),str(tot))
        
states_map.observe(observe_selected, 'selected')

#### 4) Create plots with Time interval by year, and use callback function to return the selected interval

In [20]:
plot_all = Lines(x=total_sightings_year.index.values,y=total_sightings_year[:],
                 colors = ['orange'],scales={'x':dt_x_fast,'y':lin_y})

intsel_fast = FastIntervalSelector(scale = dt_x_fast, marks = [plot_all])

def fast_interval_change_callback(change):
    ind = change.new
    if any(ind):
        if cap_select.value == 'Total Sightings':
            if not states_map.selected:
                tot = total_sightings_year.loc[ind[0]:ind[1]].sum()                
            else:
                tot = total_sightings_unnorm[state_selected].loc[ind[0]:ind[1]].sum()
            db_fast.value = '<center>The selected time period: <strong>%s - %s</strong>. Total sightings: <strong>%s</strong></center>'%(
                            int(ind[0]),int(ind[1]),str(tot))
        if cap_select.value == 'Total Duration Time':
            if not states_map.selected:
                tot = total_duration_year.loc[ind[0]:ind[1]].sum()
            else:
                tot = total_time_unnorm[state_selected].loc[ind[0]:ind[1]].sum()
            db_fast.value = '<center>The selected time period:<strong>%s - %s</strong>. Total duration seconds: <strong>%s</strong></center>'%(
                            int(ind[0]),int(ind[1]),str(tot))
intsel_fast.observe(fast_interval_change_callback,names = ['selected'])
db_fast = HTML()
db_fast.value = '<center>The selected time period is <strong>%s</strong></center>'%(str(intsel_fast.selected))

fig_all = Figure(marks=[plot_all],axes = [x_ax, y_ay_S],interaction=intsel_fast,max_aspect_ratio=1.2)



#### 5) Add dropdown widget and connect US map and plots by using callback function

In [21]:
cap_select = Dropdown(options=['Total Sightings','Total Duration Time'],description='Caption:')
def onCaptionSelected(change):
    cap = change['new']
    ind = intsel_fast.selected
    if cap == 'Total Sightings':
        states_map.color = total_sightings_state        
        if not states_map.selected:
            plot_all.x = total_sightings_year.index.values
            plot_all.y = total_sightings_year[:]
            title.value = '<center><font size="+2"><b>Total %s of UFO Sightings in US by Year</b></font></center>'%(sighting_text)
            if not ind is None:
                if any(ind):
                    tot = total_sightings_year.loc[ind[0]:ind[1]].sum()
                    db_fast.value = '<center>The selected time period: <strong>%s - %s</strong>. Total sightings: <strong>%s</strong></center>'%(
                                    int(ind[0]),int(ind[1]),str(tot))
            
        else:
            plot_all.x = total_sightings[state_selected].index.values
            plot_all.y = total_sightings[state_selected][:]
            title.value = '<center><font size="+2"><b>Normalized Total %s of UFO Sightings in %s by Year</b></font></center>'%(
                        sighting_text,state_selected.upper())
            if not ind is None:
                if any(ind):
                    tot = total_sightings_unnorm[state_selected].loc[ind[0]:ind[1]].sum()
                    db_fast.value = '<center>The selected time period: <strong>%s - %s</strong>. Total sightings: <strong>%s</strong></center>'%(
                                    int(ind[0]),int(ind[1]),str(tot))
            
    if cap == 'Total Duration Time':
        states_map.color = total_duration_state.to_dict()
        title.value = '<center><font size="+2"><b>Total %s of UFO Sightings in US by Year</b></font></center>'%(time_text)
        if not states_map.selected:
            plot_all.x = total_duration_year.index.values
            plot_all.y = total_duration_year[:]
            title.value = '<center><font size="+2"><b>Total %s of UFO Sightings in US by Year</b></font></center>'%(time_text)
            if not ind is None:
                if any(ind):
                    tot = total_duration_year.loc[ind[0]:ind[1]].sum()
                    db_fast.value = '<center>The selected time period: <strong>%s - %s</strong>. Total duration seconds: <strong>%s</strong></center>'%(
                                    int(ind[0]),int(ind[1]),str(tot))
        else:
            plot_all.x = total_time[state_selected].index.values
            plot_all.y = total_time[state_selected][:]
            title.value = '<center><font size="+2"><b>Normalized Total %s of UFO Sightings in %s by Year</b></font></center>'%(
                        time_text,state_selected.upper())
            if not ind is None:
                if any(ind):
                    tot = total_time_unnorm[state_selected].loc[ind[0]:ind[1]].sum()
                    db_fast.value = '<center>The selected time period: <strong>%s - %s</strong>. Total duration seconds: <strong>%s</strong></center>'%(
                                    int(ind[0]),int(ind[1]),str(tot))
        
cap_select.observe(onCaptionSelected,'value')

### 3. Embed all the widgets an dfigures into a H-box

In [44]:
VBox([title,
      HBox([
           VBox([cap_select,fig_map], layout = {'width': '80%','height':'95%'}),
          VBox([db_fast,fig_all],layout = {'width': '100%','height':'95%'})])])

A Jupyter Widget

In [27]:
# !pip install --upgrade pip
# !jupyter nbextension enable --py widgetsnbextension

Requirement already up-to-date: pip in /Users/chenwei/anaconda3/lib/python3.6/site-packages (19.1.1)
Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: OK
