# Climate Data from Home System

In [1]:
from erddapy import ERDDAP
import pandas as pd
import datetime

# for secondary/derived parameters
from metpy.units import units
import metpy.calc as mpcalc

In [2]:
server_url = 'http://raspberrypi.local:8080/erddap'
#server_url = 'http://192.168.2.3:8080/erddap'

In [3]:
e = ERDDAP(server=server_url)
df = pd.read_csv(e.get_search_url(response='csv', search_for='MoonFlower'))
print(df['Dataset ID'].values)

['tempest_moonflower_wx' 'channel_1027974_thingspeak'
 'channel_1037066_thingspeak' 'channel_1314759_thingspeak'
 'channel_1047747_thingspeak' 'channel_843357_thingspeak' 'channel_rpi'
 'MFPurpleAir_Primary_RT' 'allDatasets']


In [4]:
dataset_id='tempest_moonflower_wx'

try:
    d = ERDDAP(server=server_url,
        protocol='tabledap',
        response='csv'
    )
    d.dataset_id=dataset_id
except HTTPError:
    print('Failed to generate url {}'.format(dataset_id))

try:
    df_m = d.to_pandas(
                index_col='time (UTC)',
                parse_dates=True,
                skiprows=(1,)  # units information can be dropped.
                )
    df_m.sort_index(inplace=True)
    df_m.columns = [x[1].split()[0] for x in enumerate(df_m.columns)]

except:
    print(f"something failed in data download {dataset_id}")
    pass

df_m.drop(columns=['device_id', 'bucket_step_minutes', 'wind_lull','wind_interval'],inplace=True)
#stats are all utc driven - but we really want local daily values
df_m=df_m.tz_convert('US/Pacific')

# calculations of various parameters... metpy?
# HDD/CDD, dewpointTemp

In [5]:
df_m['dewpointTemp']=mpcalc.dewpoint_from_relative_humidity(df_m.temperature.values * units.degC,
                                                            df_m.humidity.values * units.percent)
#wetbulb from metpy had issues

In [6]:
df_m['SLP']=df_m.pressure.values * (1+((1013.25/df_m.pressure.values)**((287.05*0.0065)/9.80665)) * (0.0065*87.3)/288.15)**(9.80665/(287.05*0.0065))

In [7]:
df_daily_max = df_m.resample('D').max()
df_daily_min = df_m.resample('D').min()
df_daily_ave = df_m.resample('D').mean()
df_daily_total = df_m.resample('1T').mean().resample('D').sum()
df_m.sample()

Unnamed: 0_level_0,type,wind_avg,wind_gust,wind_dir,pressure,temperature,humidity,lux,uv,solar_radiation,...,strike_distance,strike_count,battery,report_interval,local_daily_precip,precip_final,local_daily_precip_final,precip_analysis_type,dewpointTemp,SLP
time (UTC),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-08-25 01:47:00-07:00,obs_st,0.0,0.0,0.0,1004.9,9.4,93.0,0.0,0.0,0.0,...,0.0,0.0,2.62,1.0,0.0,,,0.0,8.325825,1015.361362


In [8]:
use_current_month = True

if use_current_month:
    current_month = datetime.datetime.now().month
else:
    current_month = 7

current_month_grid_data=pd.DataFrame()
current_month_grid_data = df_daily_max[df_daily_max.index.month==current_month].temperature
current_month_grid_data = pd.concat([current_month_grid_data,
                                     df_daily_min[df_daily_min.index.month==current_month].temperature.round(1),
                                     df_daily_ave[df_daily_ave.index.month==current_month].temperature.round(1),
                                     df_daily_ave[df_daily_ave.index.month==current_month].dewpointTemp.round(1),
                                     df_daily_ave[df_daily_ave.index.month==current_month].SLP.round(1),
                                     df_daily_total[df_daily_total.index.month==current_month].solar_radiation.round(0),
                                     df_daily_max[df_daily_max.index.month==current_month].uv.round(1),
                                     df_daily_ave[df_daily_ave.index.month==current_month].wind_avg.round(1),
                                     df_daily_ave[df_daily_ave.index.month==current_month].wind_dir.astype(int),
                                     df_daily_max[df_daily_max.index.month==current_month].wind_gust.round(1)
                                    ],axis=1)
current_month_grid_data.columns=('max_temperature','min_temperature','mean_temperature','mean_dewpoint','mean SLP','total_solar_radiation','max_uv_index','average speed','average dir','max gust')

In [9]:
current_month_grid_data['station_id'] = 'tempest'

In [10]:
#this should go to erddap
current_month_grid_data.to_csv(f'Data/MoonflowerTempest_2020{str(current_month).zfill(2)}.csv')

In [11]:
def highlight_max(s):
    '''
    highlight the maximum in a Series yellow.
    '''
    is_max = s == s.max()
    return ['color: red' if v else '' for v in is_max]

def highlight_min(s):
    '''
    highlight the maximum in a Series yellow.
    '''
    is_min = s == s.min()
    return ['color: blue' if v else '' for v in is_min]

current_month_grid_data.drop('station_id',axis=1).style.apply(highlight_max).apply(highlight_min).format("{:.2f}")


Unnamed: 0_level_0,max_temperature,min_temperature,mean_temperature,mean_dewpoint,mean SLP,total_solar_radiation,max_uv_index,average speed,average dir,max gust
time (UTC),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-03-01 00:00:00-08:00,10.6,3.1,6.8,5.6,1020.6,14766.0,1.9,0.3,162.0,2.5
2021-03-02 00:00:00-08:00,6.9,2.8,5.0,4.0,1020.0,90092.0,1.6,0.2,103.0,3.0
2021-03-03 00:00:00-08:00,13.2,1.3,6.8,4.0,1011.5,165964.0,2.3,0.3,128.0,3.8
2021-03-04 00:00:00-08:00,12.1,1.6,6.1,5.2,1016.6,141945.0,2.6,0.5,167.0,7.0
2021-03-05 00:00:00-08:00,9.9,5.8,7.2,6.7,1014.4,75285.0,1.6,0.5,182.0,6.3
2021-03-06 00:00:00-08:00,18.6,3.5,6.6,4.0,1016.7,54711.0,2.0,1.0,196.0,5.7
2021-03-07 00:00:00-08:00,7.4,1.3,4.7,2.9,1019.9,87025.0,5.7,1.3,193.0,6.0
2021-03-08 00:00:00-08:00,12.1,-0.4,5.0,0.8,1018.4,135962.0,3.4,0.4,175.0,3.8
2021-03-09 00:00:00-08:00,11.7,1.3,5.3,1.7,1017.5,103514.0,4.4,0.5,151.0,4.0
2021-03-10 00:00:00-08:00,10.9,0.8,4.5,2.1,1018.5,116784.0,5.4,0.4,136.0,3.1


In [12]:
### need to manage daily records, monthly records, alltime records

## repeat for each sensor on property

choose month or use current (starting with data subsetting)

In [13]:
if use_current_month:
    constraints = {
        'time>=': datetime.datetime.now().strftime('%Y-%m-01T00:00:00Z'),
    }
else:
    constraints = {
        'time>=': '2020-01-01T00:00:00Z',
        'time<=': '2027-02-10T00:00:00Z',
    }

In [15]:
alldatasets=['channel_1314759_thingspeak',
            'channel_1027974_thingspeak',
            'channel_1037066_thingspeak',
            'channel_1047747_thingspeak',
            'channel_843357_thingspeak',
            'channel_rpi']
df_all = {}

for dataset_id in alldatasets:
    try:
        d = ERDDAP(server=server_url,
            protocol='tabledap',
            response='csv'
        )
        d.dataset_id=dataset_id
        d.constraints=constraints
        
    except HTTPError:
        print('Failed to generate url {}'.format(dataset_id))

    try:
        df_m = d.to_pandas(
                    index_col='time (UTC)',
                    parse_dates=True,
                    skiprows=(1,)  # units information can be dropped.
                    )
        df_m.sort_index(inplace=True)
        df_m.columns = [x[1].split()[0] for x in enumerate(df_m.columns)]

    except:
        print(f"something failed in data download {dataset_id}")
        pass

    #stats are all utc driven - but we really want local daily values
    df_m=df_m.tz_convert('US/Pacific')
    df_all.update({dataset_id:df_m})
    # calculations of various parameters... metpy?
    # HDD/CDD, dewpointTemp

something failed in data download channel_rpi


In [16]:
for k,v in enumerate(df_all):
    print(df_all[v].keys())

Index(['entry_id', 'temperature', 'RH_Percent'], dtype='object')
Index(['entry_id', 'temperature'], dtype='object')
Index(['entry_id', 'temperature', 'RH_Percent'], dtype='object')
Index(['entry_id', 'temperature_internal', 'temperature_external',
       'RH_Percent'],
      dtype='object')
Index(['entry_id', 'temperature', 'RH_Percent', 'Barotemperature', 'SLP'], dtype='object')
Index(['entry_id', 'temperature', 'RH_Percent', 'Barotemperature', 'SLP'], dtype='object')


In [19]:
if use_current_month:
    current_month = datetime.datetime.now().month
else:
    current_month = 7
    
for k,v in enumerate(df_all):
    print(df_all[v].keys())
    df_daily_max = df_all[v].resample('D').max()
    df_daily_min = df_all[v].resample('D').min()
    df_daily_ave = df_all[v].resample('D').mean()
    if ('RH_Percent' in df_all[v].keys()) and ('temperature' in df_all[v].keys()) and (not 'Barotemperature' in df_all[v].keys()):
        print(f"processing {v} :0")
        current_month_grid_data=pd.DataFrame()
        current_month_grid_data = df_daily_max[df_daily_max.index.month==current_month].temperature
        current_month_grid_data = pd.concat([current_month_grid_data,
                                             df_daily_min[df_daily_min.index.month==current_month].temperature.round(1),
                                             df_daily_ave[df_daily_ave.index.month==current_month].temperature.round(1),
                                             df_daily_ave[df_daily_ave.index.month==current_month].RH_Percent.round(1),
                                            ],axis=1)
        current_month_grid_data.columns=('max_temperature','min_temperature','mean_temperature','mean_humidity')
        current_month_grid_data['station_id'] = v    
        current_month_grid_data.to_csv(f'Data/{v}_2020{str(current_month).zfill(2)}.csv')
    elif (not 'RH_Percent' in df_all[v].keys()) and ('temperature' in df_all[v].keys()) and (not 'Barotemperature' in df_all[v].keys()):
        print(f"processing {v} :1")
        current_month_grid_data=pd.DataFrame()
        current_month_grid_data = df_daily_max[df_daily_max.index.month==current_month].temperature
        current_month_grid_data = pd.concat([current_month_grid_data,
                                             df_daily_min[df_daily_min.index.month==current_month].temperature.round(1),
                                             df_daily_ave[df_daily_ave.index.month==current_month].temperature.round(1),
                                            ],axis=1)
        current_month_grid_data.columns=('max_temperature','min_temperature','mean_temperature')
        current_month_grid_data['station_id'] = v
        
        #this should go to erddap
        current_month_grid_data.to_csv(f'Data/{v}_2020{str(current_month).zfill(2)}.csv')
    elif ('RH_Percent' in df_all[v].keys()) and ('temperature' in df_all[v].keys()) and ('Barotemperature' in df_all[v].keys()):
        print(f"processing {v} :2")
        current_month_grid_data=pd.DataFrame()
        current_month_grid_data = df_daily_max[df_daily_max.index.month==current_month].Barotemperature
        current_month_grid_data = pd.concat([current_month_grid_data,
                                             df_daily_min[df_daily_min.index.month==current_month].Barotemperature.round(1),
                                             df_daily_ave[df_daily_ave.index.month==current_month].Barotemperature.round(1),
                                            ],axis=1)
        current_month_grid_data.columns=('max_temperature','min_temperature','mean_temperature')
        current_month_grid_data['station_id'] = v
        
        #this should go to erddap
        current_month_grid_data.to_csv(f'Data/{v}_2020{str(current_month).zfill(2)}.csv')
    else:
        print(f"passing {v} :3")
        pass
    
    print(f'{v}')
current_month_grid_data.drop('station_id',axis=1).style.apply(highlight_max).apply(highlight_min).format("{:.2f}")


Index(['entry_id', 'temperature', 'RH_Percent'], dtype='object')
processing channel_1314759_thingspeak :0
channel_1314759_thingspeak
Index(['entry_id', 'temperature'], dtype='object')
processing channel_1027974_thingspeak :1
channel_1027974_thingspeak
Index(['entry_id', 'temperature', 'RH_Percent'], dtype='object')
processing channel_1037066_thingspeak :0
channel_1037066_thingspeak
Index(['entry_id', 'temperature_internal', 'temperature_external',
       'RH_Percent'],
      dtype='object')
passing channel_1047747_thingspeak :3
channel_1047747_thingspeak
Index(['entry_id', 'temperature', 'RH_Percent', 'Barotemperature', 'SLP'], dtype='object')
processing channel_843357_thingspeak :2
channel_843357_thingspeak
Index(['entry_id', 'temperature', 'RH_Percent', 'Barotemperature', 'SLP'], dtype='object')
processing channel_rpi :2
channel_rpi


Unnamed: 0_level_0,max_temperature,min_temperature,mean_temperature
time (UTC),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-03-01 00:00:00-08:00,15.8,12.9,13.6
