In [1]:
import pandas as pd
import numpy as np
import psycopg2
import os
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import TensorDataset, DataLoader
import torch
from torch.utils.data import Dataset, DataLoader
from typing import List, Optional
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.metrics import mean_absolute_error

In [2]:
def load_data():
    conn = psycopg2.connect(
        host="localhost",
        user="postgres", 
        password="password",
        database="postgres"
    )
    query = """
    SELECT timestamp_10s, avg_indoor_temperature, avg_indoor_humidity, 
           avg_exhaust_temperature, heating_status, solar_radiation, outdoor_temp 
    FROM apartment_11_10s 
    ORDER BY timestamp_10s
    """
    df = pd.read_sql(query, conn)
    conn.close()
    
    df['timestamp_10s'] = pd.to_datetime(df['timestamp_10s'])
    df.set_index('timestamp_10s', inplace=True)
    return df


In [3]:
def add_time_features(df):

    if df.index.tz is None:
        df.index = df.index.tz_localize("UTC")
    df.index = df.index.tz_convert("Asia/Tehran")

    df['hour'] = df.index.hour
    df['day_of_week'] = df.index.dayofweek  

    # Thursday (3) and Friday (4)
    df['is_weekend'] = df['day_of_week'].isin([3, 4]).astype(int)
    df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
    df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)

    return df

In [4]:
def add_heating_duration(df, time_threshold='5min'):
    df = df.copy()
    
    df['time_diff'] = df.index.to_series().diff().dt.total_seconds()
    df['is_gap'] = df['time_diff'] > pd.Timedelta(time_threshold).total_seconds()
    
    # Reset duration at gaps
    duration = 0
    durations = []
    prev_status = None
    
    for i, (status, is_gap) in enumerate(zip(df['heating_status'], df['is_gap'])):
        if i == 0 or is_gap:
            duration = 0
        elif status == prev_status:
            duration += df['time_diff'].iloc[i]
        else:
            duration = 0
            
        durations.append(duration)
        prev_status = status
    
    df['heating_duration_sec'] = durations
    df['heating_duration_min'] = df['heating_duration_sec'] / 60
    return df.drop(columns=['time_diff', 'is_gap'])

In [None]:
class TimeAwareSeriesDataset(Dataset):
    def __init__(self, 
                 df: pd.DataFrame,
                 cont_features: List[str],
                 bin_features: List[str],
                 target_col: str = 'avg_indoor_temperature',
                 history_hours: float = 3,
                 pred_horizon_hours: float = 2,
                 max_gap_minutes: float = 15,
                 scaler: Optional[MinMaxScaler] = None,
                 precompute: bool = True):
        ...
        self.precompute = precompute

        ...

        if precompute:
            self.data_X, self.data_y = self._precompute_all()
    
    def _precompute_all(self):
        """Precompute all samples and store in memory."""
        X_list = []
        y_list = []
        for idx in self.valid_idx:
            X, y = self._get_sample(idx)
            X_list.append(X)
            y_list.append(y)
        return torch.stack(X_list), torch.stack(y_list)

    def _get_sample(self, idx):
        i = self.valid_idx[idx]
        X = self.df[self.features].iloc[i:i+self.history_length].values.astype(np.float32)
        y = self.df['target_temp'].iloc[i+self.history_length-1].astype(np.float32)
        return torch.from_numpy(X), torch.tensor([y])

    def __len__(self):
        return len(self.valid_idx)

    def __getitem__(self, idx):
        if self.precompute:
            return self.data_X[idx], self.data_y[idx]
        else:
            return self._get_sample(idx)