In [1]:
import plotly.graph_objects as go
import plotly
import plotly.express as px
from plotly.subplots import make_subplots
from scipy.optimize import curve_fit
import pandas as pd
import numpy as np
import io
import requests
import matplotlib.pyplot as plt

In [2]:
#INPUTS
cpm_use_url = True    #Default for what I'm doing rn
rain_use_url = False  #See above

cpm_url = "https://radwatch.berkeley.edu/test/tmp/dosenet/pinewood_os.csv"          #If cpm_use_url = True, modify this one
rain_url = ""         #If rain_use_url = True, modify this one
cpm_name = ""         #If cpm_use_url = False, modify this one
rain_name = "PinewoodOSRainfallData1.csv"        #If rain_use_url = False, modify this one

use_cut = False
cut_start = '2015-11-01 00:00:00-08:00'
cut_end = '2017-05-31 00:00:00-08:00'

In [3]:
if (cpm_use_url):
    url = cpm_url

    header = {
      "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
      "X-Requested-With": "XMLHttpRequest"
    }
    s=requests.get(url,headers=header).text

    cpm_data = pd.read_csv(io.StringIO(s))
else:
    cpm_data = pd.read_csv(cpm_name)

if (rain_use_url):
    url = rain_url

    header = {
      "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
      "X-Requested-With": "XMLHttpRequest"
    }
    s=requests.get(url,headers=header).text

    data_rain = pd.read_csv(io.StringIO(s))
else:
    data_rain = pd.read_csv(rain_name)

if (use_cut):
    cpm_data = cpm_data[cpm_data['deviceTime_local']>cut_start]
    cpm_data = cpm_data[cpm_data['deviceTime_local']<cut_end]

daily_avg_data = []
daily_sum = 0
temp_array = []
starting_unix_time = cpm_data.iloc[len(cpm_data) - 2, 2]
ending_unix_time = cpm_data.iloc[0, 2]

while (starting_unix_time <= ending_unix_time):
    daily_cut_data = cpm_data[cpm_data["deviceTime_unix"]>starting_unix_time]
    daily_cut_data = daily_cut_data[daily_cut_data["deviceTime_unix"]<starting_unix_time + 86400]
    if (len(daily_cut_data) == 0):
        starting_unix_time = starting_unix_time + 86400
        continue
    for i in range(len(daily_cut_data)):
        #daily_sum = daily_sum + daily_cut_data.loc[i, "cpm"]
        daily_sum = daily_sum + daily_cut_data.iloc[i, 3]
    daily_avg = daily_sum / len(daily_cut_data)
    daily_avg_data.append([daily_cut_data.iloc[0, 0], starting_unix_time,  daily_avg])
    starting_unix_time = starting_unix_time + 86400
    daily_sum = 0

changable_avg_data = pd.DataFrame(daily_avg_data, columns = ["deviceTime_local", "deviceTime_unix",  "cpm"])




In [4]:
def get_averaged_data_list(interval = 86400, data = cpm_data):
    interval_avg_data = []
    interval_sum = 0
    temp_array = []
    starting_unix_time = cpm_data.iloc[len(cpm_data) - 2, 2]
    ending_unix_time = cpm_data.iloc[0, 2]

    while (starting_unix_time <= ending_unix_time):
        interval_cut_data = data[data["deviceTime_unix"]>starting_unix_time]
        interval_cut_data = interval_cut_data[interval_cut_data["deviceTime_unix"]<starting_unix_time + interval]
        if (len(interval_cut_data) == 0):
            starting_unix_time = starting_unix_time + interval
            continue
        for i in range(len(interval_cut_data)):
            interval_sum = interval_sum + interval_cut_data.iloc[i, 3]
        interval_avg = interval_sum / len(interval_cut_data)
        interval_avg_data.append([interval_cut_data.iloc[0, 1], interval_cut_data.iloc[0, 2],interval_avg])
        starting_unix_time = starting_unix_time + interval
        interval_sum = 0
    return interval_avg_data

In [5]:
def get_averaged_data_pandas(interval = 86400, data = cpm_data):
    avg_data_list = get_averaged_data_list(interval, data)
    avg_data = pd.DataFrame(avg_data_list, columns = ["deviceTime_local", "deviceTime_unix", "cpm"])
    return avg_data

In [6]:
def get_combined_data(x_cpm = changable_avg_data, y_rain = data_rain):
    combined_data_list = []
    for i in range(len(x_cpm)):
        cpm_value = x_cpm.iloc[i, 2]
        date_value = x_cpm.iloc[i, 0]
        new_date = date_value[:10]
        cut_rain_value = y_rain[y_rain["DATE"]==new_date]
        if (len(cut_rain_value) == 0):
            continue
        combined_data_list.append([cpm_value, cut_rain_value.iloc[0, 3]])
    combined_data = pd.DataFrame(combined_data_list, columns = ["cpm", "PRCP"])
    return combined_data

In [7]:
def model_combined_data(x_model = changable_avg_data, y_model = data_rain):
    
    combined_data = get_combined_data(x_model, y_model)
    x_list = []
    y_list = []
    for i in range(len(combined_data)):
        x_list.append(combined_data.iloc[i, 1])
        y_list.append(combined_data.iloc[i, 0])

    fig = px.scatter(x=x_list, y=y_list)
    fig.update_xaxes(title="PRCP (inches)",titlefont=dict(color='black', size=20),
                 linecolor='black',tickfont=dict(color='black',size=12))
    fig.update_yaxes(title="CPM",titlefont=dict(color='black', size=20),
                 showgrid=False,tickcolor='black',
                 tickfont=dict(color='black', size=16))
    fig.update_layout(
                 title_text="CPM vs Amount of Rainfall in Inches")
    fig.show()

In [8]:
def graph_cpm_vs_rainfall():
    fig = make_subplots(specs=[[{"secondary_y": True}]])

    fig.add_trace(
        go.Scatter(x = changable_avg_data['deviceTime_local'],
                   y = changable_avg_data['cpm'], name="CPM"),
                   secondary_y=False,
        )
    fig.add_trace(
        go.Scatter(x = data_rain['DATE'],
                   y = data_rain['PRCP'],
                   name="PRCP"),
                   secondary_y=True,
        )
    fig.update_layout(plot_bgcolor='light blue',width=1000, height=450)
    fig.update_layout(
        title_text="CPM with Rainfall"
    )
    fig.update_xaxes(title="Time (local) ",titlefont=dict(color='black', size=20),
                     linecolor='black',tickfont=dict(color='black',size=12))
    fig.update_layout(legend_orientation="h",
                      legend=dict(x=0,y=-.2, font=dict(size=13)))

    fig.update_yaxes(title="CPM",titlefont=dict(color='black', size=20),
                     showgrid=False,tickcolor='black',
                     tickfont=dict(color='black', size=16), secondary_y=False)
    fig.update_yaxes(title="PRCP",titlefont=dict(color='black', size=20),
                     showgrid=False,tickcolor='black',
                     tickfont=dict(color='black', size=16), secondary_y=True)
    fig.show()

In [9]:
cpm_data

Unnamed: 0,deviceTime_utc,deviceTime_local,deviceTime_unix,cpm,cpmError,error_flag
0,2019-04-15 06:55:42+00:00,2019-04-14 23:55:42-07:00,1555311342,3.2,0.800000,
1,2019-04-15 06:50:42+00:00,2019-04-14 23:50:42-07:00,1555311042,1.8,0.600000,
2,2019-04-15 06:45:42+00:00,2019-04-14 23:45:42-07:00,1555310742,1.6,0.565685,
3,2019-04-15 06:40:42+00:00,2019-04-14 23:40:42-07:00,1555310442,2.2,0.663325,
4,2019-04-15 06:35:42+00:00,2019-04-14 23:35:42-07:00,1555310142,2.4,0.692820,
...,...,...,...,...,...,...
118707,2018-02-01 21:31:45+00:00,2018-02-01 13:31:45-08:00,1517520705,1.0,0.447214,
118708,2018-02-01 21:26:46+00:00,2018-02-01 13:26:46-08:00,1517520406,1.8,0.600000,
118709,2018-02-01 21:26:45+00:00,2018-02-01 13:26:45-08:00,1517520405,1.8,0.600000,
118710,2018-02-01 21:21:47+00:00,2018-02-01 13:21:47-08:00,1517520107,2.0,0.632456,


In [10]:
data_rain

Unnamed: 0,STATION,NAME,DATE,PRCP,SNOW
0,US1CASC0001,"MOUNTAIN VIEW 1.2 S, CA US",2018-02-01,0.0,0.0
1,US1CASC0001,"MOUNTAIN VIEW 1.2 S, CA US",2018-02-02,0.0,0.0
2,US1CASC0001,"MOUNTAIN VIEW 1.2 S, CA US",2018-02-03,0.0,0.0
3,US1CASC0001,"MOUNTAIN VIEW 1.2 S, CA US",2018-02-04,0.0,0.0
4,US1CASC0001,"MOUNTAIN VIEW 1.2 S, CA US",2018-02-05,0.0,0.0
...,...,...,...,...,...
434,US1CASC0001,"MOUNTAIN VIEW 1.2 S, CA US",2019-04-11,0.0,0.0
435,US1CASC0001,"MOUNTAIN VIEW 1.2 S, CA US",2019-04-12,0.0,0.0
436,US1CASC0001,"MOUNTAIN VIEW 1.2 S, CA US",2019-04-13,0.0,0.0
437,US1CASC0001,"MOUNTAIN VIEW 1.2 S, CA US",2019-04-14,0.0,0.0


In [16]:
def find_rainfall_dates_list(lower_bound = 0.5, data = data_rain):
    dates = []
    for i in range(len(data_rain)):
        if (data.iloc[i, 3] >= lower_bound):
            dates.append(data.iloc[i, 2])
    return dates

In [17]:
def find_rainfall_dates_pandas(lower_bound = 0.5, data = data_rain):
    dates = find_rainfall_dates_list(lower_bound, data)
    dates_pandas = pd.DataFrame(dates, columns = ["DATE"])
    return dates_pandas

In [0]:
def graph_rainfall_days():
    
    
    
    closeup_data = cpm_data[cpm_data['deviceTime_local']>'2019-01-06 00:00:00-08:00']
    closeup_data = closeup_data[closeup_data['deviceTime_local']<'2019-01-10 00:00:00-08:00']

    interval = 900

    closeup_avg_data = []
    closeup_sum = 0
    temp_array = []
    starting_unix_time = closeup_data.iloc[len(closeup_data) - 1, 2]
    ending_unix_time = closeup_data.iloc[0, 2]

    while (starting_unix_time <= ending_unix_time):
        closeup_cut_data = closeup_data[closeup_data["deviceTime_unix"]>starting_unix_time]
        closeup_cut_data = closeup_cut_data[closeup_cut_data["deviceTime_unix"]<starting_unix_time + interval]
        if (len(closeup_cut_data) == 0):
            starting_unix_time = starting_unix_time + interval
            continue
        for i in range(len(closeup_cut_data)):
            closeup_sum = closeup_sum + closeup_cut_data.iloc[i, 3]
        closeup_avg = closeup_sum / len(closeup_cut_data)
        closeup_avg_data.append([closeup_cut_data.iloc[0, 1], closeup_avg])
        starting_unix_time = starting_unix_time + interval
        closeup_sum = 0

    three_day_avg_data = pd.DataFrame(closeup_avg_data, columns = ["deviceTime_local", "cpm"])
    three_day_avg_data

    fig = go.Figure()
    fig.add_trace(go.Scatter(
            x = three_day_avg_data['deviceTime_local'],
            y = three_day_avg_data['cpm'],
            #error_y = dict(type='data',array=cpm_data['cpmError']),
        ))
    fig.update_layout(plot_bgcolor='light blue',width=1000, height=450)
    fig.update_yaxes(title="CPM",titlefont=dict(color='black', size=20),
                     showgrid=False,tickcolor='black',
                     tickfont=dict(color='black', size=16))
    fig.update_xaxes(title="Time (local) ",titlefont=dict(color='black', size=20),
                     linecolor='black',tickfont=dict(color='black',size=12))
    fig.update_layout(legend_orientation="h",
                      legend=dict(x=0,y=-.2, font=dict(size=13)))

    fig.show()

In [13]:
model_combined_data()

In [18]:
graph_cpm_vs_rainfall()