Fukushima Scatter Plot Example using SQLite Database
=====================================================
This example illustrates basic queries on the Fukushima incident data.  It creates separate scatter plots for each flight date.

Make sure you have a Sina kernel selected before you begin.

You can reconfigure this example to use your copy of the database by modifying **DB_PATH** in the next cell to point to the root directory of your copy of the Fukushima database.  Once the path is set properly, run the next cell.

In [None]:
# Root directory containing the fukushima.sqlite database
DB_PATH = '/collab/usr/gapps/wf/examples/data/fukushima'

print('Path to the database is configured.  Ready to run the next cell.')

Load the Data
===========
Load all of the observation data and aggregate coordinates and radiation counts by date.

In [None]:
import os
import sina.datastores.sql as sina_sql

# Create the data access object factory.
factory = sina_sql.DAOFactory(os.path.join(DB_PATH, 'fukushima.sqlite'))

# Extract the data from the database
print("Loading the data has started and will take some time.")
observation_recs = factory.createRecordDAO().get_all_of_type("obs")

# Build a dictionary of data for each date
mnoda_keys = [u'date', u'latitude', u'longitude', u'gcnorm']
flights = {}
for observation in observation_recs:
    values = ['' for _ in mnoda_keys]
    for entry in observation['data']:
        key = entry['name']
        if key in mnoda_keys:
            if key != 'date':
                val = float(entry['value'])
            else:
                val = entry['value'].encode('utf-8')
            values[mnoda_keys.index(key)] = val
    
    date, lat, lon, gcnorm = values
    coords = (lon, lat)
    
    if date in flights.keys():
        if coords in flights[date].keys():
            flights[date][coords].append(gcnorm)
        else:
            flights[date][coords] = [gcnorm]
    else:
        flights[date] = {coords: [gcnorm]}
        
print("The data is loaded.  Ready to run the next cell.")

Create the Graphs
===============
Create a scatter plot for each flight date and add the power plant and selected cities as points of reference.

In [None]:
import matplotlib.pyplot as plot
import numpy

# Identify the coordinates, label, and label orientation for the power
# plant and selected cities as points of reference.
CITIES = [  # (lon, lat), desc, horizontal alignment
    [(141.0281, 37.4213), ' Daiichi Nuclear Power Plant', 'left'],
    [(141.0125, 37.4492), 'Futaba ', 'right'],
    [(141.0000, 37.4833), ' Namie', 'left'],
    [(140.9836, 37.4044), ' Okuma', 'left'],
    [(141.0088, 37.3454), ' Tomioka', 'left']]

# Define colors to be plotted, in order, and associated gcnorm range.
# Black appears first to ensure cities are plotted first to avoid obscuring
# radiation data.  The remaining colors are ordered from safer-to-less safe
# levels.
COLORS = [['black', (None, None)],  
          ['blue', (0, 2000)],
          ['cyan', (2000, 3000)],
          ['green', (3000, 5000)],
          ['yellow', (5000, 10000)],
          ['orange', (10000, 20000)],
          ['red', (20000, None)]]

def get_color(value):
    """Returns the color associated with the value"""
    result = 'black'
    for color, range_ in COLORS:
        if None not in range_:
            if value >= range_[0] and value < range_[1]:
                result = color
                break
        elif range_[0] is not None and value >= range_[0]:
            result = color
            break
    return result

def get_color_label(color_in):
    """Returns the range label associated with the color"""
    label = None
    if color_in != 'black':
        for color, range_ in COLORS:
            if color_in == color:
                if range_[0] == 0:
                    label = '<%s' % range_[1]
                elif range_[1] is not None:
                    label = '%4d-%4d' % (range_[0], range_[1]-1)
                else:
                    label = '>=%s' % range_[0]
                break

    return label

def plot_date(date):
    """Create a plot for the given flight/date"""
    # Aggregate coordinates for measurements by color for plotting purposes
    plot_data = {}
    for color, _ in COLORS:
        plot_data[color] = []
        
    # Add coordinates for selected cities for points of reference
    for coord, _, _ in CITIES:
        plot_data['black'].append(list(coord))
    
    # Use the maximum count for each coordinate (in case there are duplicates)
    observations = flights[date]
    for coord in observations.keys():
        max_gcnorm = max(observations[coord])
        plot_data[get_color(max_gcnorm)].append(list(coord))
    
    # Produce the plot with larger than default font sizes for readability
    fig = plot.figure(figsize=(8,6))
    fig.suptitle("Fukushima Radiation: Flight %s" % date, fontsize=20)
    ax = plot.subplot(111)
    plot.xlabel('Longitude', fontsize=18)
    plot.ylabel('Latitude', fontsize=18)
    plot.tick_params(labelsize=16)
    plot.xticks(rotation=-25)

    # .. Plot radiation data in color order
    for color, _ in COLORS:
        color_data = plot_data[color]
        num_coords = len(color_data)
        if num_coords > 0:
            arr = numpy.array(color_data)
            
            # Make city markers smaller than radiation data
            area = 35 if color == 'black' else 70
            
            plot.scatter(x=arr[:,0], y=arr[:,1], s=area,
                         c=color, label=get_color_label(color))
    
    # .. Add city labels
    for coord, desc, ha in CITIES:
        ax.annotate(desc, coord, va="center", ha=ha, fontsize=16)
    
    # .. Adjust the plot to position the legend
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width*0.8, box.height])
    legend = ax.legend(scatterpoints=1, frameon=False, labelspacing=.8,
                       loc='center left', bbox_to_anchor=(1, 0.5),
                       fontsize=16)
    legend.set_title('Counts per Second', prop={'size': 16})
    
    plot.show()
    
# Plot the data in order by known flight date
print("Plotting flight data by date.  This will take a while.")

for date in ['4/5/2011', '4/18/2011', '5/9/2011']:
    plot_date(date)
    
print("Done.")