# Sound pressure level vs. precipitation

## Imports

In [None]:
import h5py
import os
import pandas as pd
import numpy as np
import sys
import altair as alt
from datetime import datetime, timedelta

In [None]:
sys.path.append('/'.join(os.getcwd().split('/')[:-1]) + '/library')
from searcher import Searcher

## Weather data

I'm renaming the columns because square brackets have a special meaning in Altair.

In [None]:
weather_df = pd.read_csv(
    '../data/weather-hourly.csv', 
    usecols=['datetime[utc]', 'precipitation[mm]'], 
    parse_dates=['datetime[utc]']
).rename(columns={'datetime[utc]': 'end_time', 'precipitation[mm]': 'precipitation'})

In [None]:
april_2017_weather = weather_df[
    (weather_df['end_time'].dt.year == 2017) &
    (weather_df['end_time'].dt.month == 4) &
    (weather_df['end_time'].dt.minute == 51)
].copy()

The start time is 59m 59s before the end time. This is because `Searcher.return_interval` uses inclusive start and end  times.

In [None]:
april_2017_weather['start_time'] = april_2017_weather['end_time'] - timedelta(seconds=60 * 60 - 1)

In [None]:
april_2017_weather.head()

In [None]:
weather_df_2017 =  weather_df[(weather_df['end_time'].dt.year == 2017) &
                              (weather_df['end_time'].dt.minute == 51)]
hours_with_precipitation = weather_df_2017[weather_df_2017['precipitation'] > 0].shape[0]
hours_in_year = 24 * 365
print(f'hours with precipitation = {hours_with_precipitation}')
print(f'total hours = {hours_in_year}')
print(f'percentage of hours with precipitation = {hours_with_precipitation / hours_in_year:.2%}')

## SONYC data

In [None]:
class Plotter:
    def __init__(self, node, searcher, weather_df):
        self.node = node
        self.searcher = searcher
        self.df = weather_df
        self.df['spl'] = self.df.apply(self.get_mean_spl, axis=1)


    def get_mean_spl(self, row):
        interval = self.searcher.return_interval(row['start_time'],
                                                 row['end_time'])
        info = self.searcher.information[interval['index'].values]
        return info['spl_vector'].mean()
    
    
    def scatter(self, data):
        return alt.Chart(data).mark_point().encode(
            x=alt.X('precipitation:Q',
                    title='Precipitation (mm)',
                    scale=alt.Scale(zero=False)),
            y=alt.Y('spl:Q',
                    title='Mean sound pressure level (dBA)',
                    scale=alt.Scale(zero=False)),
        ).properties(
            title=f'{self.node} - hourly amounts'
        ).interactive()


    def scatter_all_points(self):
        return self.scatter(self.df)


    def scatter_points_with_precipitation(self):
        return self.scatter(self.df[self.df['precipitation'] > 0])


    def linechart(self):
        rain_chart = alt.Chart(self.df).mark_line().encode(
            x=alt.X('end_time:T', title='Date'),
            y=alt.Y('precipitation:Q', title='Precipitation (mm)')
        ).properties(width=800)
        
        spl_chart = alt.Chart(self.df).mark_line().encode(
            x=alt.X('end_time:T', title='Date'),
            y=alt.Y('spl:Q', title='Sound pressure level (dBA)')
        ).properties(width=800)
        
        return alt.vconcat(rain_chart, spl_chart)

In [None]:
node1 = 'sonycnode-b827eb86d458.sonyc'
node3 = 'sonycnode-b827ebb40450.sonyc'
node4 = 'sonycnode-b827eb73e772.sonyc'

In [None]:
searcher1 = Searcher(node1)

In [None]:
plotter = Plotter(node1, searcher1, april_2017_weather)

In [None]:
plotter.scatter_all_points()

In [None]:
plotter.scatter_points_with_precipitation()

In [None]:
plotter.linechart()