Fukushima Scatter Plot Example using SQLite Database
=====================================================
This example illustrates basic queries on the Fukushima incident data.  It creates separate scatter plots for each flight date.

Make sure you have a Sina kernel selected before you begin.

In [None]:
from collections import defaultdict
import matplotlib.pyplot as plot
import numpy
import sina.datastores.sql as sina_sql
import sina.utils

# Create the data access object factory.
database = sina.utils.get_example_path("fukushima/fukushima-new.sqlite")
print("Using database {}".format(database))
factory = sina_sql.DAOFactory(database)

# Make a phantom call to plot.show() to work around a known Jupyter issue with displaying graphs
plot.show()

print("Connection to database made. Ready to proceed.")

Load the Data
===========
Load all of the observation data and aggregate coordinates and radiation counts by date.

In [None]:
# Extract the data from the database
print("Loading the data has started and will take some time.")
observation_recs = factory.createRecordDAO().get_all_of_type("obs")

# Build a dictionary of data for each date
flights = defaultdict(lambda: defaultdict(list))
for obs in observation_recs:
    coords = (obs['data']['longitude']['value'], obs['data']['latitude']['value'])
    date = obs['data']['date']['value']
    gcnorm = obs['data']['gcnorm']['value']
    flights[date][coords].append(gcnorm)

print("The data is loaded.  Ready to run the next cell.")

Create the Graphs
===============
Create a scatter plot for each flight date and add the power plant and selected cities as points of reference.

In [None]:
%matplotlib notebook

# Use a "small" marker area for cities
AREA_CITIES = 35

# Use a marker area for radiation data that is larger than that of cities
AREA_DATA = AREA_CITIES * 2

# Identify the coordinates, label, and label orientation for the power
# plant and selected cities as points of reference.
CITIES = [  # (lon, lat), desc, horizontal alignment
    [(141.0281, 37.4213), ' Daiichi Nuclear Power Plant', 'left'],
    [(141.0125, 37.4492), 'Futaba ', 'right'],
    [(141.0000, 37.4833), ' Namie', 'left'],
    [(140.9836, 37.4044), ' Okuma', 'left'],
    [(141.0088, 37.3454), ' Tomioka', 'left']]

# The maximum radiation value
MAX_RAD = float('inf')

# Define colors to be plotted, in order of increasing levels of radiation,
# and associated gcnorm ranges, which are always positive
COLORS = [['blue', (0, 2000)],
          ['cyan', (2000, 3000)],
          ['green', (3000, 5000)],
          ['yellow', (5000, 10000)],
          ['orange', (10000, 20000)],
          ['red', (20000, MAX_RAD)]]

# Use a unique color to mark cities
COLOR_CITIES = 'black'

# Use a standard base font size for labels
FONT_LABEL_SIZE = 16


def get_color(value):
    """Return the color associated with the value."""
    result = 'black'
    for color, range_ in COLORS:
        if value >= range_[0] and value < range_[1]:
            result = color
            break
    return result


def get_color_label(color_in):
    """Return the range label associated with the color."""
    label = None
    for color, range_ in COLORS:
        if color_in == color:
            if range_[0] == 0:
                label = '<%s' % range_[1]
            elif range_[1] != MAX_RAD:
                label = '%4d-%4d' % (range_[0], range_[1] - 1)
            else:
                label = '>=%s' % range_[0]
            break

    return label


def plot_date(date):
    """Create a plot for the given flight/date."""
    # Aggregate coordinates for measurements by color for plotting purposes
    plot_data = {COLOR_CITIES: []}
    for color, _ in COLORS:
        plot_data[color] = []

    # Add coordinates for selected cities for points of reference in black
    for coord, _, _ in CITIES:
        plot_data[COLOR_CITIES].append(list(coord))

    # Use the maximum count for each coordinate (in case there are duplicates)
    observations = flights[date]
    for coord in observations.keys():
        max_gcnorm = max(observations[coord])
        plot_data[get_color(max_gcnorm)].append(list(coord))

    # Produce the plot with larger than default font sizes for readability
    fig = plot.figure(figsize=(8, 6))
    fig.suptitle("Fukushima Radiation: Flight %s" % date, fontsize=20)
    ax = plot.subplot(111)
    plot.xlabel('Longitude', fontsize=FONT_LABEL_SIZE)
    plot.ylabel('Latitude', fontsize=FONT_LABEL_SIZE)
    plot.tick_params(labelsize=FONT_LABEL_SIZE)
    plot.xticks(rotation=-25)

    # .. Plot the cities with smaller, black markers
    arr = numpy.array(plot_data[COLOR_CITIES])
    plot.scatter(x=arr[:, 0], y=arr[:, 1], s=AREA_CITIES,
                 c=COLOR_CITIES, label=None)

    # .. Plot radiation data in color order
    for color, _ in COLORS:
        color_data = plot_data[color]
        num_coords = len(color_data)
        if num_coords > 0:
            arr = numpy.array(color_data)
            plot.scatter(x=arr[:, 0], y=arr[:, 1], s=AREA_DATA,
                         c=color, label=get_color_label(color))

    # .. Add city labels
    for coord, desc, ha in CITIES:
        ax.annotate(desc, coord, va="center", ha=ha, fontsize=FONT_LABEL_SIZE)

    # .. Adjust the plot to position the legend
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
    legend = ax.legend(scatterpoints=1, frameon=False, labelspacing=0.8,
                       loc='center left', bbox_to_anchor=(1, 0.5),
                       fontsize=FONT_LABEL_SIZE)
    legend.set_title('Counts per Second', prop={'size': FONT_LABEL_SIZE})

    plot.show()

# Plot the data in order by known flight date
print("Plotting flight data by date.  This may take a while.")

for date in ['4/5/2011', '4/18/2011', '5/9/2011']:
    plot_date(date)