In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.graphics.tsaplots as sgt
import statsmodels.tsa.stattools as sts
from statsmodels.tsa.arima_model import ARIMA, ARMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from scipy.stats.distributions import chi2 
from math import sqrt
import seaborn as sns
sns.set()
from influxdb_client import InfluxDBClient
import warnings
warnings.filterwarnings("ignore")

# Import Dataset

In [3]:
#Auth parameters
my_token = "yaKfFeAsha8tNAZxvYeZBMmq-khO8tz-6Ut_PARgohiWzeW2j8BB86ND33Qbq7hR8bylmOROPQAUr-7M103_Yw=="
my_org = "iot-org"
bucket = "iot-demo"
client = InfluxDBClient(url="http://192.168.1.100:9999", token=my_token, org=my_org, debug=False)

In [4]:
days = '5d'
n_days = 5

queryTemperature = '''
from(bucket: "iot-demo")
  |> range(start: -%s, stop: now())
  |> filter(fn: (r) => r["_measurement"] == "temperature")
  |> filter(fn: (r) => r["device"] == "RoomWeather")
  |> aggregateWindow(every: 10m, fn: median)
  ''' % days

queryHumidity = '''
from(bucket: "iot-demo")
  |> range(start: -%s, stop: now())
  |> filter(fn: (r) => r["_measurement"] == "humidity")
  |> filter(fn: (r) => r["device"] == "RoomWeather")
  |> aggregateWindow(every: 10m, fn: median)
  ''' % days

queryPressure = '''
from(bucket: "iot-demo")
  |> range(start: -%s, stop: now())
  |> filter(fn: (r) => r["_measurement"] == "pressure")
  |> filter(fn: (r) => r["device"] == "RoomWeather")
  |> aggregateWindow(every: 10m, fn: median)
  ''' % days

In [5]:
#Query InfluxDB and return a Dataframe
dfTemp = client.query_api().query_data_frame(org=my_org, query=queryTemperature)
dfHum = client.query_api().query_data_frame(org=my_org, query=queryHumidity)
dfPress = client.query_api().query_data_frame(org=my_org, query=queryPressure)

In [6]:
df = pd.DataFrame(dfTemp['_time']).rename(columns={'_time':'time'})
df['temperature'] = dfTemp['_value']
df['humidity'] = dfHum['_value']
df['pressure'] = dfPress['_value']
df = df.set_index('time')
df.head()

Unnamed: 0_level_0,temperature,humidity,pressure
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-07-07 17:10:00+00:00,30.65,35.95,
2020-07-07 17:20:00+00:00,30.65,35.9,
2020-07-07 17:30:00+00:00,30.65,35.8,
2020-07-07 17:40:00+00:00,30.7,35.6,
2020-07-07 17:50:00+00:00,30.65,35.3,


In [7]:
df['temperature'] = df['temperature'].fillna(method='backfill')
df['humidity'] = df['humidity'].fillna(method='backfill')
df['pressure'] = df['pressure'].fillna(method='backfill').fillna(method='ffill')

In [8]:
points_per_day = int(len(df)/n_days) 
points_per_day

144

In [9]:
print(df['temperature'].isnull().sum())
print(df['humidity'].isnull().sum())
print(df['pressure'].isnull().sum())

0
0
0


In [10]:
# divide in train e test
size = int(len(df)*0.8)
df_train, df_test = df.iloc[:size], df.iloc[size:]

# Predicting temperature

## AR Model

In [21]:
df_test.head()

Unnamed: 0_level_0,temperature,humidity,pressure
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-07-11 17:10:00+00:00,33.3,49.4,1019.0
2020-07-11 17:20:00+00:00,33.0,50.6,1019.0
2020-07-11 17:30:00+00:00,32.9,50.8,1019.0
2020-07-11 17:40:00+00:00,32.7,50.8,1019.0
2020-07-11 17:50:00+00:00,32.65,50.8,1019.0


In [22]:
df_test.tail()

Unnamed: 0_level_0,temperature,humidity,pressure
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-07-12 16:30:00+00:00,30.75,43.7,1023.5
2020-07-12 16:40:00+00:00,30.85,43.6,1023.5
2020-07-12 16:50:00+00:00,30.85,43.5,1023.5
2020-07-12 17:00:00+00:00,30.85,43.1,1023.5
2020-07-12 17:08:40.216798+00:00,30.9,43.1,1023.5


In [None]:
from datetime import datetime

model_ar = SARIMAX(df_train.temperature, exog = df_train[["humidity"]], order = (2,0,2), seasonal_order = (2,0,2,points_per_day))
results_ar = model_ar.fit()

start_date = datetime.fromisoformat('2020-07-11 17:10:00+00:00')
end_date = datetime.fromisoformat('2020-07-12 17:08:40.216798+00:00')

df_pred = results_ar.predict(start = start_date, end = end_date, exog = df_test[["humidity"]])

df_pred[start_date:end_date].plot(figsize = (20,5), color = "red")
df_test.temperature[start_date:end_date].plot(color = "blue")
plt.title("Predictions vs Actual", size = 24)
plt.show()