In [2]:


from _dfguru import DataFrameGuru as DFG
from _occupancy_forecasting import MasterTrainer
from _occupancy_forecasting import load_data
from _occupancy_forecasting import avoid_name_conflicts
from _evaluating import ParameterSearch

import torch
from torch import nn
from torch.utils.tensorboard import SummaryWriter

import torch.multiprocessing
torch.multiprocessing.set_sharing_strategy('file_system')

import numpy as np
import os
dfg = DFG()
torch.cuda.empty_cache()


############ Inputs ############
#args = parse_arguments()
#args = prompt_for_missing_arguments(args)0
#n_run = args.n_run
#n_param = args.n_param

n_run = 10
n_param = 0

overwrite = True
################################

param_dir = "_occupancy_forecasting/parameters/wrap_up"
tb_log_dir = "_occupancy_forecasting/training_logs/wrap_up"
cp_log_dir = "_occupancy_forecasting/checkpoints/wrap_up"
path_to_data = "data/occupancy_forecasting"

frequency = "5min"
split_by = "time"


train_dict, val_dict, test_dict = load_data(
    path_to_data_dir=path_to_data, 
    frequency=frequency, 
    split_by=split_by,
    dfguru=dfg,
    with_examweek=False
)


data = train_dict[0]


In [3]:
import matplotlib.pyplot as plt

import datetime
import json


helper_file = os.path.join("data/occupancy_forecasting", "helpers_occpred.json")
with open(helper_file, "r") as f:
    helper = json.load(f)       
norm_registered = helper["columns_to_normalize"]["registered"]
norm_temperature = helper["columns_to_normalize"]["tl"]

In [4]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio

from _plotting import DataPlotter

### Plot Some Features on Slide of 17.12 Presentation

In [5]:
# 2024-04-09 07:00:00
start = datetime.datetime(2024, 4, 16, 7, 0, 0)
# 2024-04-09 21:00:00
stop = datetime.datetime(2024, 4, 16, 21, 0, 0)

plot_data = dfg.filter_by_timestamp(data, "datetime", start, stop) 

plot_data["tl"] = (plot_data["tl"] - norm_temperature["min"]) / (norm_temperature["max"] - norm_temperature["min"])
plot_data["registered"] = (plot_data["registered"] - norm_registered["min"]) / (norm_registered["max"] - norm_registered["min"])

plotter = DataPlotter(
    save_path="",
    dataframe_guru=dfg
)

#plotter.plot_some_features(plot_data)


### Extract ranges and size of train/val/test set

In [6]:

for room_id in [0,1]:
    # print number of samples and range of datetime
    print(f"Train Room {room_id}: {len(train_dict[room_id])} samples from {train_dict[room_id]['datetime'].min()} to {train_dict[room_id]['datetime'].max()}")
    print(f"Val Room {room_id}: {len(val_dict[room_id])} samples from {val_dict[room_id]['datetime'].min()} to {val_dict[room_id]['datetime'].max()}")
    print(f"Test Room {room_id}: {len(test_dict[room_id])} samples from {test_dict[room_id]['datetime'].min()} to {test_dict[room_id]['datetime'].max()}")
    print()

Train Room 0: 17728 samples from 2024-04-08 00:00:00 to 2024-06-08 13:15:00
Val Room 0: 2216 samples from 2024-06-08 13:20:00 to 2024-06-16 05:55:00
Test Room 0: 2216 samples from 2024-06-16 06:00:00 to 2024-06-23 22:35:00

Train Room 1: 17728 samples from 2024-04-08 00:00:00 to 2024-06-08 13:15:00
Val Room 1: 2216 samples from 2024-06-08 13:20:00 to 2024-06-16 05:55:00
Test Room 1: 2216 samples from 2024-06-16 06:00:00 to 2024-06-23 22:35:00



### Show class imbalances -> zero problem

In [7]:
# print number of timesteps with occrate = 0

for room_id in [0,1]:
    train_occrate = train_dict[room_id]["occrate"]
    print(f"Train Room {room_id}: zero:{len(train_occrate[train_occrate == 0])} total:{len(train_occrate)} relative:{len(train_occrate[train_occrate == 0]) / len(train_occrate)}")
    val_occrate = val_dict[room_id]["occrate"]
    print(f"Val Room {room_id}: zero:{len(val_occrate[val_occrate == 0])} total:{len(val_occrate)} relative:{len(val_occrate[val_occrate == 0]) / len(val_occrate)}")
    test_occrate = test_dict[room_id]["occrate"]
    print(f"Test Room {room_id}: zero:{len(test_occrate[test_occrate == 0])} total:{len(test_occrate)} relative:{len(test_occrate[test_occrate == 0]) / len(test_occrate)}")
    print()
    
    
# sum over all rooms
train_occrate = np.concatenate([train_dict[0]["occrate"], train_dict[1]["occrate"]])
val_occrate = np.concatenate([val_dict[0]["occrate"], val_dict[1]["occrate"]])
test_occrate = np.concatenate([test_dict[0]["occrate"], test_dict[1]["occrate"]])

print(f"Train: zero:{len(train_occrate[train_occrate == 0])} total:{len(train_occrate)} relative:{len(train_occrate[train_occrate == 0]) / len(train_occrate)}")
print(f"Val: zero:{len(val_occrate[val_occrate == 0])} total:{len(val_occrate)} relative:{len(val_occrate[val_occrate == 0]) / len(val_occrate)}")
print(f"Test: zero:{len(test_occrate[test_occrate == 0])} total:{len(test_occrate)} relative:{len(test_occrate[test_occrate == 0]) / len(test_occrate)}")

Train Room 0: zero:13653 total:17728 relative:0.7701376353790613
Val Room 0: zero:1820 total:2216 relative:0.8212996389891697
Test Room 0: zero:1727 total:2216 relative:0.7793321299638989

Train Room 1: zero:14132 total:17728 relative:0.7971570397111913
Val Room 1: zero:1842 total:2216 relative:0.8312274368231047
Test Room 1: zero:1868 total:2216 relative:0.8429602888086642

Train: zero:27785 total:35456 relative:0.7836473375451264
Val: zero:3662 total:4432 relative:0.8262635379061372
Test: zero:3595 total:4432 relative:0.8111462093862816


### Features that make sense

In [7]:
train_dict[0].columns

Index(['datetime', 'occrate', 'occcount', 'occcountdiff', 'occratediff',
       'lecture', 'exam', 'maxocccount', 'offsite', 'coursenumber',
       'registered', 'test', 'maxocccountestimate', 'maxoccrate',
       'lecturerampbefore', 'lecturerampafter', 'tutorium', 'cancelled',
       'ects', 'maxoccrateestimate', 'VL', 'UE', 'KS', 'Informatik', 'None_sa',
       'Volkswirtschaftslehre', 'Chemie', 'Wirtschaftsinformatik',
       'Maschinenbau', 'Betriebswirtschaftslehre', 'Rechtswissenschaften',
       'Mathematik', 'Mechatronik', 'Informationselektronik',
       'Biologische Chemie', 'Sozialwissenschaften', 'Artificial Intelligence',
       'Kunststofftechnik', 'Statistik', 'Pädagogik', 'Medical Engineering',
       'B1 - Bachelor 1. Jahr', 'None_level', 'B2 - Bachelor 2. Jahr',
       'M1 - Master 1. Jahr', 'B3 - Bachelor 3. Jahr', 'D - Diplom',
       'M2 - Master 2. Jahr', 'hod1', 'hod2', 'dow1', 'dow2', 'week1', 'week2',
       'holiday', 'zwickltag', 'occrate1week', 'occcount1we

In [8]:
course_features = {"maxocccount", "maxoccrate" ,"maxoccrateestimate", "maxocccountestimate",
                "coursenumber", "exam",  "test", "tutorium", "cancelled","offsite", 
                "lecture", "lecturerampbefore", "lecturerampafter",
                "registered", "type", "studyarea", "ects", "level"}
datetime_features = {"dow", "hod", "week", "holiday", "zwickltag"}
general_features = {"occcount", "occrate", "avgocc"}
weather_features = {"weather"}
shift_features = {"occcount1week", "occrate1week", "occcount1day", "occrate1day"}

In [9]:
train_dict[0].columns

Index(['datetime', 'occrate', 'occcount', 'occcountdiff', 'occratediff',
       'lecture', 'exam', 'maxocccount', 'offsite', 'coursenumber',
       'registered', 'test', 'maxocccountestimate', 'maxoccrate',
       'lecturerampbefore', 'lecturerampafter', 'tutorium', 'cancelled',
       'ects', 'maxoccrateestimate', 'VL', 'UE', 'KS', 'Informatik', 'None_sa',
       'Volkswirtschaftslehre', 'Chemie', 'Wirtschaftsinformatik',
       'Maschinenbau', 'Betriebswirtschaftslehre', 'Rechtswissenschaften',
       'Mathematik', 'Mechatronik', 'Informationselektronik',
       'Biologische Chemie', 'Sozialwissenschaften', 'Artificial Intelligence',
       'Kunststofftechnik', 'Statistik', 'Pädagogik', 'Medical Engineering',
       'B1 - Bachelor 1. Jahr', 'None_level', 'B2 - Bachelor 2. Jahr',
       'M1 - Master 1. Jahr', 'B3 - Bachelor 3. Jahr', 'D - Diplom',
       'M2 - Master 2. Jahr', 'hod1', 'hod2', 'dow1', 'dow2', 'week1', 'week2',
       'holiday', 'zwickltag', 'occrate1week', 'occcount1we

#### Features Columns

Essential Features:
* Occupancy information: number of occupancts absolute or relative (divided by room capacity)
* Time stamp: Temporal resolution of t minutes

Course Features:
* Lecture: If a lecture takes place or not
* Date Specific Features: Exam, Test, Tutorium, Cancelled
* Course Specific Features: Registered students, Type (VL,UE,KS), Study area, Level, Course number

Time-related Features:
* Time, Weekday, (Calendarweek)
* Holiday, Zwickltag

Weather Features:
* Temperature, Air pressure, Precipation (sum over time interval), Wind speed, Air humidity, Sunshine duration

Additional Features:
* Average occupancy information of last k weeks

'VL', 'UE', 'KS', 

Study area: Maybe try with learnable parameter
'Informatik', 'None_sa',
'Volkswirtschaftslehre', 'Chemie', 'Wirtschaftsinformatik',
'Maschinenbau', 'Betriebswirtschaftslehre', 'Rechtswissenschaften',
'Mathematik', 'Mechatronik', 'Informationselektronik',
'Biologische Chemie', 'Sozialwissenschaften', 'Artificial Intelligence',
'Kunststofftechnik', 'Statistik', 'Pädagogik', 'Medical Engineering',

Level: Maybe try with learnable parameter
'B1 - Bachelor 1. Jahr', 'None_level', 'B2 - Bachelor 2. Jahr',
'M1 - Master 1. Jahr', 'B3 - Bachelor 3. Jahr', 'D - Diplom',
'M2 - Master 2. Jahr', 

In [21]:
# General Columns
# datetime, occupancy information (occcount, occrate=occcount/room_capacity)
# lecture ?

# Columns Concerning Specific Course Dates
# 

### Old Stuff

In [15]:
"""# 2024-04-09 07:00:00
start = datetime.datetime(2024, 4, 9, 7, 0, 0)
# 2024-04-09 21:00:00
stop = datetime.datetime(2024, 4, 9, 21, 0, 0)

plot_data = dfg.filter_by_timestamp(data, "datetime", start, stop) 

# 3 subplots in 3 rows
fig, axs = plt.subplots(3, 1, figsize=(15, 10))
# plot occrate
axs[0].plot(plot_data["datetime"], plot_data["occrate"], label="ocrate")
# plot registered
registered = (plot_data["registered"] - norm_registered["min"]) / (norm_registered["max"] - norm_registered["min"])
axs[1].plot(plot_data["datetime"], registered, label="registered")
# temperature
temperature = (plot_data["tl"] - norm_temperature["min"]) / (norm_temperature["max"] - norm_temperature["min"])
axs[2].plot(plot_data["datetime"], temperature, label="temperature")
plt.show()"""

'# 2024-04-09 07:00:00\nstart = datetime.datetime(2024, 4, 9, 7, 0, 0)\n# 2024-04-09 21:00:00\nstop = datetime.datetime(2024, 4, 9, 21, 0, 0)\n\nplot_data = dfg.filter_by_timestamp(data, "datetime", start, stop) \n\n# 3 subplots in 3 rows\nfig, axs = plt.subplots(3, 1, figsize=(15, 10))\n# plot occrate\naxs[0].plot(plot_data["datetime"], plot_data["occrate"], label="ocrate")\n# plot registered\nregistered = (plot_data["registered"] - norm_registered["min"]) / (norm_registered["max"] - norm_registered["min"])\naxs[1].plot(plot_data["datetime"], registered, label="registered")\n# temperature\ntemperature = (plot_data["tl"] - norm_temperature["min"]) / (norm_temperature["max"] - norm_temperature["min"])\naxs[2].plot(plot_data["datetime"], temperature, label="temperature")\nplt.show()'

In [16]:

#fig = make_subplots(
#    rows=3, 
#    cols=1, 
#    subplot_titles=("Occupancy Rate", "Registered Students", "Temperature in Linz")
#    )
#x_col = "datetime"

## occupancy rate
#fig.add_trace(
#    go.Scatter(
#        x=plot_data[x_col], 
#        y=plot_data["occrate"],
#        mode='lines', 
#        name='Occupancy Rate'
#        ),
#    row=1, col=1
#    )
## registered students
#registered = (plot_data["registered"] - norm_registered["min"]) / (norm_registered["max"] - norm_registered["min"])
#fig.add_trace(
#    go.Scatter(
#        x=plot_data[x_col], 
#        y=registered,
#        mode='lines', 
#        name='Registered Students'
#        ),
#    row=2, col=1
#    )

## temperature
#temperature = (plot_data["tl"] - norm_temperature["min"]) / (norm_temperature["max"] - norm_temperature["min"])
#fig.add_trace(
#    go.Scatter(
#        x=plot_data[x_col], 
#        y=temperature,
#        mode='lines', 
#        name='Temperature in Linz'
#        ),
#    row=3, col=1
#    )

## set y axis between 0 and 1
#fig.update_yaxes(range=[-0.1, 1], row=1, col=1)
#fig.update_yaxes(range=[-0.1, 1], row=2, col=1)
#fig.update_yaxes(range=[-0.1, 1], row=3, col=1)
#fig.show()