In [1]:
import os
import sys
import numpy as np
import torch
import pandas as pd
from torch.utils.data import Dataset, DataLoader,random_split, Subset

In [2]:
# Load and preprocess the data
file_path = '../Data/RICO4_Dataset_processed.hdf'
df = pd.read_hdf(file_path)

# Filter heating data and downsample
df_h = df[df['temp_change_class'] == 'h']
def select_points(group):
    return group.iloc[::10]  # Select every 10th row
df_h = df_h.groupby('Scheduler Step').apply(select_points).reset_index(drop=True)

# Exclude specific intervals
excluded_intervals = [53, 5]
df_h_filtered = df_h[~df_h['interval'].isin(excluded_intervals)].reset_index(drop=True)
df_h = df_h_filtered

# Define RICO4Dataset
class RICO4Dataset(Dataset):
    def __init__(self, df_h):
        self.df_h = df_h
        self.unique_intervals = df_h['interval'].unique()

    def __len__(self):
        return len(self.unique_intervals)

    def __getitem__(self, idx):
        batch = self.unique_intervals[idx]
        id = self.df_h['interval'] == batch
        temp = torch.tensor(self.df_h[id]['B.RTD1'].to_numpy(), dtype=torch.float32)
        ti = torch.tensor(self.df_h[id]['time_within_interval'].to_numpy() / 240, dtype=torch.float32)
        x_batch = torch.cat((temp.unsqueeze(1), ti.unsqueeze(1)), axis=1)
        return x_batch

# Create dataset instance
dataset = RICO4Dataset(df_h)

# Split the dataset into above_20 and below_20
above_20_data = []
below_20_data = []

for idx in range(len(dataset)):
    x_batch = dataset[idx]
    starting_value = x_batch[0, 0].item()  # Get the starting value of B.RTD1
    if starting_value > 20:
        above_20_data.append(x_batch)
    else:
        below_20_data.append(x_batch)

# Define SubsetDataset
class SubsetDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

# Create DataLoader instances
above_20_dataset = SubsetDataset(above_20_data)
below_20_dataset = SubsetDataset(below_20_data)

train_loader = DataLoader(above_20_dataset, batch_size=1, shuffle=True)  # Adjust batch_size as needed
val_loader = DataLoader(below_20_dataset, batch_size=1, shuffle=True)
len(train_loader)

  df_h = df_h.groupby('Scheduler Step').apply(select_points).reset_index(drop=True)


8