In [None]:
import pandas as pd
import numpy as np
import joblib
import urllib.request, urllib.parse, urllib.error
import tarfile
import tempfile
import shutil
import os
import math
from datetime import datetime, date, time

In [None]:
models_zipped_filename = 'orholm-data-xgb-model.tar.gz'
model_low_filename = 'orholm-data-xgb-model-y_low.joblib'
model_high_filename = 'orholm-data-xgb-model-y_high.joblib'
temp_tar_file = 'O_P_1-files.tar'
model_names = ['y_low', 'y_high']

In [None]:
def csv_read_days(filename, temparature_col_name, rh_col_name, days=10):
    df = pd.read_csv(temp_path + '/' + filename, 
                      parse_dates=[['date', 'time']], 
                      skiprows=1, 
                      names=['date', 'time', temparature_col_name, rh_col_name], 
                      decimal=',',
                      sep='\t'
                     )
    df.rename(columns={'date_time':'date'}, inplace=True)
    # only save the relevant number of days
    df = df.tail(24*days)
    df.set_index('date', inplace=True)
    return df

In [None]:
def ah1(t, rh): # KB
    ah = (1322.9*(rh/100)*math.e**(t/(t+238.3)*17.2694) / (t+273.15))
    return ah

In [None]:
# unzip model joblib files and load models for low and high humidity prediction
shutil.unpack_archive(models_zipped_filename)
model_low = joblib.load(model_low_filename)
model_high = joblib.load(model_high_filename)

In [None]:
# unzip data files to predict from
tf = tarfile.open(temp_tar_file)
temp_path = tempfile.mkdtemp(dir='.')
tf.extractall(temp_path)

# read the data files
df1 = csv_read_days('O_P_1_001.tdf', 'temp_1', 'rh_1')
df2 = csv_read_days('O_P_1_002.tdf', 'temp_2', 'rh_2')
df3 = csv_read_days('O_P_1_003.tdf', 'temp_3', 'rh_3')

In [None]:
# data files are loaded, now combine and build features used by model
df = df1.join([df2, df3], how='inner')

# these are all the same aux features we fed into the model builder
df['rh_median'] = df[['rh_1', 'rh_2', 'rh_3']].median(axis=1)
df['temp_median'] = df[['temp_1', 'temp_2', 'temp_3']].median(axis=1)
df['ah_median'] = df.apply(lambda x: ah1(x.temp_median, x.rh_median), axis = 1)
df['rh_median_24_mean'] = df.rh_median.rolling(24, min_periods=1).mean()
df['rh_median_24_ewm_mean'] = df.rh_median.ewm(span=24).mean()
df['rh_median_24_median'] = df.rh_median.rolling(24, min_periods=1).median()
df['rh_median_24_min'] = df.rh_median.rolling(24, min_periods=1).min()
df['rh_median_24_max'] = df.rh_median.rolling(24, min_periods=1).max()

# drop raw columns not used in the model
df = df.drop(['temp_1', 'temp_2', 'temp_3', 'rh_1', 'rh_2', 'rh_3'], axis=1)

In [None]:
models_dict = {'low': model_low, 'high': model_high}

# predict for both the 'high' and 'low' model
for key in models_dict:
    predictions = models_dict[key].predict(np.array(df))
    if predictions.tolist()[-1:][0] > 0:
        # ring the bells, sound the alarm, send an email!
        print("WARNING!")
