In [1]:
import pandas as pd
import numpy as np
import psycopg2
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import TensorDataset, DataLoader

In [2]:
def load_data():
    conn = psycopg2.connect(
        host="localhost",
        user="postgres", 
        password="password",
        database="postgres"
    )
    query = """
    SELECT timestamp_10s, avg_indoor_temperature, avg_indoor_humidity, 
           avg_exhaust_temperature, heating_status, solar_radiation, outdoor_temp 
    FROM apartment_11_10s 
    ORDER BY timestamp_10s
    """
    df = pd.read_sql(query, conn)
    conn.close()
    
    df['timestamp_10s'] = pd.to_datetime(df['timestamp_10s'])
    df.set_index('timestamp_10s', inplace=True)
    return df


In [3]:
import pandas as pd
from datetime import timedelta

def get_contiguous_segments():

        
    df = load_data()
    
    gap_threshold = timedelta(hours=1)  
    time_diffs = df.index.to_series().diff()
    gaps = time_diffs > gap_threshold
    gap_indices = df.index[gaps]

    segments = []
    start_idx = 0

    for gap_idx in gap_indices:
        end_idx = df.index.get_loc(gap_idx) - 1
        if end_idx > start_idx:
            segments.append(df.iloc[start_idx:end_idx])
        start_idx = df.index.get_loc(gap_idx) + 1

    segments.append(df.iloc[start_idx:])

    print(f"Split into {len(segments)} continuous segments:")
    for i, seg in enumerate(segments):
        duration = seg.index[-1] - seg.index[0]
        print(f"Segment {i+1}: {seg.index[0]} to {seg.index[-1]} (Duration: {duration})")
        
    return segments

segments = get_contiguous_segments()

  df = pd.read_sql(query, conn)


Split into 5 continuous segments:
Segment 1: 2025-01-16 14:25:40 to 2025-02-05 05:36:30 (Duration: 19 days 15:10:50)
Segment 2: 2025-02-05 07:05:40 to 2025-02-11 16:42:10 (Duration: 6 days 09:36:30)
Segment 3: 2025-02-11 20:19:30 to 2025-02-17 16:09:00 (Duration: 5 days 19:49:30)
Segment 4: 2025-02-18 09:10:30 to 2025-03-04 06:32:00 (Duration: 13 days 21:21:30)
Segment 5: 2025-03-05 11:47:20 to 2025-03-22 18:18:10 (Duration: 17 days 06:30:50)


In [4]:
def add_time_features(df):

    if df.index.tz is None:
        df.index = df.index.tz_localize("UTC")
    df.index = df.index.tz_convert("Asia/Tehran")

    df['hour'] = df.index.hour
    df['day_of_week'] = df.index.dayofweek  # Monday=0

    # Thursday (3) and Friday (4)
    df['is_weekend'] = df['day_of_week'].isin([3, 4]).astype(int)
    df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
    df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)

    return df

In [5]:
def add_heating_duration(df, time_threshold='5min'):
    df = df.copy()
    
    df['time_diff'] = df.index.to_series().diff().dt.total_seconds()
    df['is_gap'] = df['time_diff'] > pd.Timedelta(time_threshold).total_seconds()
    
    # Reset duration at gaps
    duration = 0
    durations = []
    prev_status = None
    
    for i, (status, is_gap) in enumerate(zip(df['heating_status'], df['is_gap'])):
        if i == 0 or is_gap:
            duration = 0
        elif status == prev_status:
            duration += df['time_diff'].iloc[i]
        else:
            duration = 0
            
        durations.append(duration)
        prev_status = status
    
    df['heating_duration_sec'] = durations
    df['heating_duration_min'] = df['heating_duration_sec'] / 60
    return df.drop(columns=['time_diff', 'is_gap'])

In [6]:
def prepare_data(df, prediction_horizon=2*60*6, history_length=3*60*6):
    
    df['target_temp'] = df['avg_indoor_temperature'].shift(-prediction_horizon)
    df.dropna(subset=['target_temp'], inplace=True)
    
    # Separate feature types
    binary_features = ['heating_status', 'is_weekend']
    continuous_features = [
        'avg_indoor_temperature', 'avg_indoor_humidity',
        'avg_exhaust_temperature', 'solar_radiation', 
        'outdoor_temp', 'hour_sin', 'hour_cos',
        'heating_duration_min'
    ]
    
    # Normalize continuous features
    cont_scaler = MinMaxScaler()
    df[continuous_features] = cont_scaler.fit_transform(df[continuous_features])
    
    # Create sequences
    def create_sequences(data, targets):
        X, y = [], []
        for i in range(len(data) - history_length - prediction_horizon):
            X.append(data.iloc[i:i+history_length].values)
            y.append(targets.iloc[i+history_length+prediction_horizon-1])
        return np.array(X), np.array(y)
    
    X, y = create_sequences(df[continuous_features + binary_features], df['target_temp'])
    
    # Train-test split
    split_idx = int(0.8 * len(X))
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]
    
    # Display sample sequence
    sample_idx = 500  # Arbitrary position
    print("Input features shape:", X_train[sample_idx].shape)
    print("First timestep features:\n", X_train[sample_idx][0])
    print("Heating duration values:", X_train[sample_idx][:, -3])  # 3rd last feature
    print("Corresponding target:", y_train[sample_idx])
    
    return (X_train, y_train), (X_test, y_test), cont_scaler

In [None]:
def create_loaders(X_train, y_train, X_test, y_test, batch_size=64):

    train_data = TensorDataset(
        torch.FloatTensor(X_train), 
        torch.FloatTensor(y_train)
    )
    test_data = TensorDataset(
        torch.FloatTensor(X_test),
        torch.FloatTensor(y_test)
    )

    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_data, batch_size=batch_size)
    
    return train_loader, test_loader

In [None]:
df = load_data()
df = add_time_features(df)  
df = add_heating_duration(df)

(X_train, y_train), (X_test, y_test), scaler = prepare_data(df)

  df = pd.read_sql(query, conn)


: 