In [1]:
from Functions import simple_models_hold_out, hold_out, optimise_occupancy, \
adjacent_co2, load_data, load_occupancy, load_lists, plot_estimates
import numpy as np

To load a dd_list, we first need to find the start and end time for
the corresponding occupancy:

In [2]:
dates = ['2022_24_11', '2022_30_11', '2022_07_12',  '2022_09_12', '2022_14_12']

filename_n = 'data/N_' + dates[0] + '.csv'
N_list, start_time, end_time = load_occupancy(filename_n, n_zones=27)
len(N_list), np.array(N_list[-1]), N_list[2]

(28,
 array([17, 13, 11, 11, 13, 16, 16, 13, 14, 13, 13, 19, 17, 17, 17, 15, 17,
        15, 15, 15]),
 [])

The N_list from load occupancy contains n_zones + 1 lists of occupancy
counted in the period, and is simply empty if the zone does not have data.
Along with this, we also got a start and end time which is now used to find
the correponding CO2 levels:

In [3]:
filename_co2 = 'data/co2_' + dates[0] + '.csv'
dd_list = load_data(filename_co2, start_time, end_time)
len(dd_list), dd_list[-1][:3], dd_list[2][:3]

(28,
 [[datetime.datetime(2022, 11, 24, 11, 0), 612.1185266332714],
  (datetime.datetime(2022, 11, 24, 11, 15), 1058.8146743982488),
  (datetime.datetime(2022, 11, 24, 11, 30), 1059.31337984861)],
 [[datetime.datetime(2022, 11, 24, 11, 0), 555.5901475703059],
  (datetime.datetime(2022, 11, 24, 11, 15), 637.0009125158241),
  (datetime.datetime(2022, 11, 24, 11, 30), 657.7038489323096)])

The dd_list is of the same length as the N_list, but contains tuples of
(time, co2) for each zone. The co2 level is a smoothed estimate of the true level based on measurements up to each occupancy count. One can specify whether to use an exponential moving average filter or a Kalman filter for this.

When dealing with more than one consecutive function, the structure becomes a bit more complex. Instead of each zone simply containing the list of data, it must now contain a list of periods, each of which is an entry like the above N_list and dd_list. For this we use

In [4]:
N_list, dd_list = load_lists(dates)
np.array(N_list[-1][-1]), len(N_list[-1])

(array([ 0,  1,  3,  5,  7,  9,  9, 13, 16, 16, 13, 16, 15, 15, 16, 15, 17,
        14, 11, 13, 14, 15, 14, 15, 15, 17, 15, 15, 16, 16, 15, 15, 13, 15,
        15]),
 5)

There are 5 periods in this data. This can now be used to get optimal parameters for estimating both CO2 and occupancy. First of all, to calculate an estimate we pass a set of parameters along with the co2, adjacent co2 and N_lists to the estimate functions. We find the adjacent CO2 levels first

In [None]:
# Behind the scenes, there is an id_map dictionary which has a zone number for key and maps it to
# its neighbour's zone numbers. This map can be changed in 'constants.py'
adj_list = adjacent_co2(dd_list, use_adjacent=False)
print(np.array(adj_list[-1][-1])) # 0's if we aren't using the replacement co2
adj_list = adjacent_co2(dd_list)
print(np.array(adj_list[-1][-1])) # use replacement co2 by default

Now to producing estimates, the parameters used are from optimisation done on the data available in the repository, but must simply be a tuple-like of length 4. Variable names here mirror those inside functions in the rest of the code.

In [None]:
import pandas as pd
from Functions import N_estimate, C_estimate
N, C, C_adj = N_list[-1][-1], np.array(dd_list[-1][-1])[:, 1], adj_list[-1][-1]
V = 150
parameters = pd.read_csv('parameters/testing_2022_07_12.csv').values

C_est = C_estimate(x=parameters[-1][1:], C=C, C_adj=C_adj, N=N, V=V)
N_est = N_estimate(x=parameters[-1][1:], C=C, C_adj=C_adj, V=V)

print(np.round(C_est - np.array(C[1:], dtype=int), 0), '\n', N_est - N[1:])

With these results, we can now go on to doing the optimisation ourselves. This is done by using the following code:

N_list, dd_list = load_lists(dates)
optimise_occupancy(dd_list, N_list)

By specifying the parameter 'filename_parameters', these are saved in a .cvs file

This all comes together to do hold out cross-validation in the function hold_out:

In [None]:
hold_out(dates, plot=True, filename_parameters='testing')


The hold_out method has a cousin which implements various simple model for estimating occupancy

In [None]:
# The linear regression model only shows a scatter plot of the data
simple_models_hold_out(dates, method='l', plot=True, plot_scatter=True)


The hold_out method has a cousin which implements various simple model for estimating occupancy

In [None]:
# The linear regression model only shows a scatter plot of the data
simple_models_hold_out(dates, method='l', plot=True, plot_scatter=True)
