In [7]:
# Import all required libraries
import pandas as pd
import numpy as np
import psycopg2
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import plotly.graph_objects as go

2025-05-17 17:30:24.596133: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-17 17:30:27.604235: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747490428.141490  380516 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747490428.234261  380516 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1747490429.793125  380516 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [8]:
def load_data():
    conn = psycopg2.connect(
        host="localhost",
        user="postgres", 
        password="password",
        database="postgres"
    )
    query = """
    SELECT timestamp_10s, avg_indoor_temperature, avg_indoor_humidity, 
           avg_exhaust_temperature, heating_status, solar_radiation, outdoor_temp 
    FROM apartment_11_10s 
    ORDER BY timestamp_10s
    """
    df = pd.read_sql(query, conn)
    conn.close()
    
    # Convert timestamp and set as index
    df['timestamp_10s'] = pd.to_datetime(df['timestamp_10s'])
    df.set_index('timestamp_10s', inplace=True)
    return df


In [9]:
def add_time_features(df):
    # Localize to UTC if naive, then convert to Tehran timezone
    if df.index.tz is None:
        df.index = df.index.tz_localize("UTC")
    df.index = df.index.tz_convert("Asia/Tehran")

    # Extract time components
    df['hour'] = df.index.hour
    df['day_of_week'] = df.index.dayofweek  # Monday=0

    # Weekend is now Thursday (3) and Friday (4)
    df['is_weekend'] = df['day_of_week'].isin([3, 4]).astype(int)

    # Cyclical encoding for hour
    df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
    df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)

    return df

In [10]:
def add_heating_duration(df, time_threshold='5min'):
    df = df.copy()
    
    # Mark gaps (>5min between observations)
    df['time_diff'] = df.index.to_series().diff().dt.total_seconds()
    df['is_gap'] = df['time_diff'] > pd.Timedelta(time_threshold).total_seconds()
    
    # Reset duration at gaps
    duration = 0
    durations = []
    prev_status = None
    
    for i, (status, is_gap) in enumerate(zip(df['heating_status'], df['is_gap'])):
        if i == 0 or is_gap:
            duration = 0
        elif status == prev_status:
            duration += df['time_diff'].iloc[i]
        else:
            duration = 0
            
        durations.append(duration)
        prev_status = status
    
    df['heating_duration_sec'] = durations
    df['heating_duration_min'] = df['heating_duration_sec'] / 60
    return df.drop(columns=['time_diff', 'is_gap'])

In [11]:
def prepare_data(df, prediction_horizon=2*60*6, history_length=3*60*6):
    # Create target
    df['target_temp'] = df['avg_indoor_temperature'].shift(-prediction_horizon)
    df.dropna(subset=['target_temp'], inplace=True)
    
    # Separate feature types
    binary_features = ['heating_status', 'is_weekend']
    continuous_features = [
        'avg_indoor_temperature', 'avg_indoor_humidity',
        'avg_exhaust_temperature', 'solar_radiation', 
        'outdoor_temp', 'hour_sin', 'hour_cos',
        'heating_duration_min'
    ]
    
    # Normalize continuous features (using MinMax as requested)
    from sklearn.preprocessing import MinMaxScaler
    cont_scaler = MinMaxScaler()
    df[continuous_features] = cont_scaler.fit_transform(df[continuous_features])
    
    # Create sequences
    def create_sequences(data, targets):
        X, y = [], []
        for i in range(len(data) - history_length - prediction_horizon):
            X.append(data.iloc[i:i+history_length].values)
            y.append(targets.iloc[i+history_length+prediction_horizon-1])
        return np.array(X), np.array(y)
    
    X, y = create_sequences(df[continuous_features + binary_features], df['target_temp'])
    
    # Train-test split
    split_idx = int(0.8 * len(X))
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]
    
    # Display sample sequence
    sample_idx = 500  # Arbitrary position
    print("Input features shape:", X_train[sample_idx].shape)
    print("First timestep features:\n", X_train[sample_idx][0])
    print("Heating duration values:", X_train[sample_idx][:, -3])  # 3rd last feature
    print("Corresponding target:", y_train[sample_idx])
    
    return (X_train, y_train), (X_test, y_test), cont_scaler

In [12]:
from torch.utils.data import TensorDataset, DataLoader

def create_torch_loaders(X_train, y_train, X_test, y_test, batch_size=64):
    # Convert to PyTorch tensors
    train_data = TensorDataset(
        torch.FloatTensor(X_train), 
        torch.FloatTensor(y_train)
    )
    test_data = TensorDataset(
        torch.FloatTensor(X_test),
        torch.FloatTensor(y_test)
    )
    
    # Create loaders
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_data, batch_size=batch_size)
    
    return train_loader, test_loader

In [None]:
print(f"Total sequences to create: {len(df) - history_length - prediction_horizon}")

In [13]:
# 1. Load and prepare the data
df = load_data()

# 2. Add REQUIRED features first
df = add_time_features(df)  # Creates hour_sin, hour_cos, etc.
df = add_heating_duration(df)  # Creates heating_duration_min

# 3. Now prepare for model
(X_train, y_train), (X_test, y_test), scaler = prepare_data(df)

# 4. Verify the data
print(f"Total sequences: {len(X_train)} training, {len(X_test)} test")
print(f"Each sequence shape: {X_train[0].shape}")  # Should be (history_length, num_features)

# 5. Inspect a sample
sample_idx = 500
try:
    print("\nSample Input Features Shape:", X_train[sample_idx].shape)
    print("\nFirst Timestep Features:")
    print(pd.DataFrame([X_train[sample_idx][0]], 
                      columns=['indoor_temp', 'humidity', 'exhaust_temp', 
                              'solar_rad', 'outdoor_temp', 'hour_sin', 
                              'hour_cos', 'heating_dur_min', 'heating_status', 
                              'is_weekend']))
    
    print("\nHeating Duration Evolution:")
    print(X_train[sample_idx][:, -3])  # 3rd last feature
    
    print("\nCorresponding Target Temperature:", y_train[sample_idx])
except IndexError:
    print(f"Sample {sample_idx} doesn't exist. Max index is {len(X_train)-1}")

  df = pd.read_sql(query, conn)


: 