Fukushima Scatter Plot Example using SQLite Database
=====================================================
This example illustrates basic, raw queries on the example Fukushima incident data.  It creates separate plots for each flight date.

You can reconfigure this example to use your environment and or copy of the database by modifying the following variables in the next cell:
* **DB_PATH** is the root directory of the Fukushima database
* **VENV_PATH** is the root directory of the virtual environment containing Sina packages

Once the paths are set properly, run the next cell.

In [None]:
'''
Create scatterplots for each flight date using the example database derived from
Fukushima data on data.gov.

Data source: https://catalog.data.gov/dataset/\
    us-doe-nnsa-response-to-2011-fukushima-incident-at-sea-aerial-data
'''
# Path options
# .. Root directory containing the fukushima.sqlite database
DB_PATH = '/collab/usr/gapps/wf/examples/data/fukushima'

# .. Root directory of the virtual environment
VENV_PATH = '/collab/usr/gapps/wf/releases/venv-user'

# Ensure sina modules are available to the notebook
import os
import sys
sys.path.append(os.path.join(VENV_PATH, 'lib/python2.7/site-packages/sina'))

print('Paths are configured.  Ready to run the next cell.')

Load the Data
===========
Load all of the observation data and aggregate coordinates and radiation counts by date.

In [None]:
import sina.datastores.sql as sina_sql

# Create the database access factory.
fact = sina_sql.DAOFactory(os.path.join(DB_PATH, 'fukushima.sqlite'))

# Extract the data from the database
print("Loading the data has started and will take some time.")
observations = fact.createRecordDAO().get_all_of_type("obs")

# Build a dictionary of data for each date
keys = ['date', 'latitude', 'longitude', 'gcnorm']
data = {}
for obs in observations:
    rec = obs.raw
    lvals = ['' for _ in keys]
    for row in rec['data']:
        nm = row['name']
        lkys = row.keys()
        if nm in keys:
            ind = keys.index(nm)
            val = row['value']
            if isinstance(val, unicode):
                val = val.encode('utf-8')
            if nm != 'date':
                val = float(val)
            lvals[ind] = val
    dt, lat, lon, gcnorm = lvals
    coords = (lon, lat)
    if dt in data.keys():
        if coords in data[dt].keys():
            data[dt][coords].append(val)
        else:
            data[dt][coords] = [val]
    else:
        data[dt] = {coords: [val]}
        
print("The data is loaded.  Ready to run the next cell.")

Create the Graphs
===============
Create a scatter plot for each flight date and add the power plant and selected cities as points of reference.

In [None]:
# Perform basic plotting set up: imports and colors
import matplotlib.pyplot as plt
import numpy as np

cities = [  # (lon, lat), desc, horiza
    [(141.0281, 37.4213), ' Daiichi Nuclear Power Plant', 'left'],
    [(141.0125, 37.4492), 'Futaba ', 'right'],
    [(141.0000, 37.4833), ' Namie', 'left'],
    [(140.9836, 37.4044), ' Okuma', 'left'],
    [(141.0088, 37.3454), ' Tomioka', 'left']]

colors = [['black', (None,None)],
          ['blue', (0, 2000)],
          ['cyan', (2000, 3000)],
          ['green', (3000, 5000)],
          ['yellow', (5000, 10000)],
          ['orange', (10000, 20000)],
          ['red', (20000, None)]]

def getcolor(val):
    color = 'black'
    for col, rng in colors:
        if None not in rng:
            if val >= rng[0] and val < rng[1]:
                color = col
                break
        elif rng[0] is not None and val >= rng[0]:
            color = col
            break
    return color

def getcolorlbl(color):
    lbl = None
    if color != 'black':
        for col, rng in colors:
            if color == col:
                if rng[0] == 0:
                    lbl = '<%s' % rng[1]
                elif rng[1] is not None:
                    lbl = '%4d-%4d' % (rng[0], rng[1]-1)
                else:
                    lbl = '>=%s' % rng[0]
                break

    return lbl

# Define the plotting function
def plotdate(dt):
    # Aggregate the data by color
    vals = {}
    for col, rng in colors:
        vals[col] = []
        
    for coord, _, _ in cities:
        vals['black'].append(list(coord))
    
    ddata = data[dt]
    for coords in ddata.keys():
        maxgc = max(ddata[coords])
        vals[getcolor(maxgc)].append(list(coords))
    
    # Now produce the plot
    fig = plt.figure(figsize=(8,6))
    fig.suptitle("Fukushima Radiation: Flight %s" % dt, fontsize=20)
    ax = plt.subplot(111)
    #plt.title("Fukushima Radiation: Flight %s" % dt, fontsize=20)
    plt.xlabel('Longitude', fontsize=18)
    plt.ylabel('Latitude', fontsize=18)
    plt.tick_params(labelsize=16)
    plt.xticks(rotation=-25)

    for col, rng in colors:
        nvals = len(vals[col])
        if nvals > 0:
            arr = np.array(vals[col])
            area = 35 if col == 'black' else 70
            lbl = getcolorlbl(col)
            plt.scatter(x=arr[:,0], y=arr[:,1], s=area,
                        c=col,label=lbl)
    
    for coord, desc, horiza in cities:
        ax.annotate(desc, coord, va="center", ha=horiza,
                    fontsize=16)
    
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width*0.8, box.height])
    
    leg = ax.legend(scatterpoints=1, frameon=False, labelspacing=.8,
                    loc='center left', bbox_to_anchor=(1, 0.5),
                    fontsize=16)
    leg.set_title('Counts per Second', prop={'size': 16})
    
    plt.show()
    
# Plot the data
for dt in ['4/5/2011', '4/18/2011', '5/9/2011']:
    plotdate(dt)