In [2]:
import pandas as pd
import numpy as np
import psycopg2
import os
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import TensorDataset, DataLoader
import torch
from torch.utils.data import Dataset, DataLoader
from typing import List, Optional
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.metrics import mean_absolute_error


In [3]:
def load_data():
    conn = psycopg2.connect(
        host="localhost",
        user="postgres", 
        password="password",
        database="postgres"
    )
    query = """
    SELECT timestamp_10s, avg_indoor_temperature, avg_indoor_humidity, 
           avg_exhaust_temperature, heating_status, solar_radiation, outdoor_temp 
    FROM apartment_11_10s 
    ORDER BY timestamp_10s
    """
    df = pd.read_sql(query, conn)
    conn.close()
    
    df['timestamp_10s'] = pd.to_datetime(df['timestamp_10s'])
    df.set_index('timestamp_10s', inplace=True)
    return df


In [4]:
def add_time_features(df):

    if df.index.tz is None:
        df.index = df.index.tz_localize("UTC")
    df.index = df.index.tz_convert("Asia/Tehran")

    df['hour'] = df.index.hour
    df['day_of_week'] = df.index.dayofweek  

    # Thursday (3) and Friday (4)
    df['is_weekend'] = df['day_of_week'].isin([3, 4]).astype(int)
    df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
    df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)

    return df

In [5]:
def add_heating_duration(df, time_threshold='5min'):
    df = df.copy()
    
    df['time_diff'] = df.index.to_series().diff().dt.total_seconds()
    df['is_gap'] = df['time_diff'] > pd.Timedelta(time_threshold).total_seconds()
    
    # Reset duration at gaps
    duration = 0
    durations = []
    prev_status = None
    
    for i, (status, is_gap) in enumerate(zip(df['heating_status'], df['is_gap'])):
        if i == 0 or is_gap:
            duration = 0
        elif status == prev_status:
            duration += df['time_diff'].iloc[i]
        else:
            duration = 0
            
        durations.append(duration)
        prev_status = status
    
    df['heating_duration_sec'] = durations
    df['heating_duration_min'] = df['heating_duration_sec'] / 60
    return df.drop(columns=['time_diff', 'is_gap'])

In [None]:
class TimeAwareSeriesDataset(Dataset):
    def __init__(self, 
                 df: pd.DataFrame,
                 cont_features: List[str],
                 bin_features: List[str],
                 target_col: str = 'avg_indoor_temperature',
                 history_hours: float = 3,  
                 pred_horizon_hours: float = 2, 
                 max_gap_minutes: float = 15,  # Maximum allowed gap in minutes
                 scaler: Optional[MinMaxScaler] = None):
        """
            cont_features: List of continuous feature names
            bin_features: List of binary feature names
            target_col: Name of target column
            history_hours: Length of history window in hours
            pred_horizon_hours: Prediction horizon in hours
            max_gap_minutes: Maximum allowed gap within a window (minutes)
            scaler: Pre-fit scaler or None to create new
        """
        self.df = df.copy()
        self.cont_features = cont_features
        self.bin_features = bin_features
        self.features = cont_features + bin_features
        self.target_col = target_col
        self.max_gap_minutes = max_gap_minutes
        
        # Calculate time differences in minutes
        self.time_deltas = self.df.index.to_series().diff().dt.total_seconds().div(60).values
        
        # Convert time windows from hours to data points
        self.median_interval = np.median(self.time_deltas[1:])  # Skip first NaN
        self.history_length = int(history_hours * 60 / self.median_interval)
        self.pred_horizon = int(pred_horizon_hours * 60 / self.median_interval)
        
        # Normalization
        self.scaler = scaler or MinMaxScaler()
        if scaler is None:
            self.df[cont_features] = self.scaler.fit_transform(self.df[cont_features])
        else:
            self.df[cont_features] = self.scaler.transform(self.df[cont_features])
        
        # Create shifted target column
        self.df['target_temp'] = self.df[target_col].shift(-self.pred_horizon)
        self.df.dropna(subset=['target_temp'], inplace=True)
        
        # Pre-compute valid indices
        self.valid_idx = self._compute_valid_indices()
    
    def _compute_valid_indices(self) -> List[int]:
        """Find indices where the time gaps are within tolerance"""
        valid_idx = []
        total_length = self.history_length + self.pred_horizon
        
        for i in range(len(self.df) - total_length + 1):
            # Check gaps in history window
            history_gaps = self.time_deltas[i+1 : i+self.history_length]
            
            # Check gap between history end and target
            prediction_gaps = self.time_deltas[i+self.history_length : i+total_length]
            
            if (np.all(history_gaps <= self.max_gap_minutes) and 
                np.all(prediction_gaps <= self.max_gap_minutes)):
                valid_idx.append(i)
                
        return valid_idx
    
    def __len__(self) -> int:
        return len(self.valid_idx)
    
    def __getitem__(self, idx: int):
        i = self.valid_idx[idx]
        
        # Features (history window)
        X = self.df[self.features].iloc[i:i+self.history_length].values.astype(np.float32)
        
        # Target (single point at prediction horizon)
        y = self.df['target_temp'].iloc[i+self.history_length-1].astype(np.float32)
        
        return torch.from_numpy(X), torch.tensor(y).unsqueeze(0)

# Feature lists
binary_features = ['heating_status', 'is_weekend']
continuous_features = [
    'avg_indoor_temperature', 'avg_indoor_humidity',
    'avg_exhaust_temperature', 'solar_radiation', 
    'outdoor_temp', 'hour_sin', 'hour_cos', 'heating_duration_min'
]

# Modified data loading and preprocessing
def load_and_preprocess_data():
    # Load data
    df = load_data()
    
    # Filter dates
    start_date = '2025-01-16' 
    end_date = '2025-01-30'
    df = df.loc[start_date:end_date].copy()
    
    # Add features
    df = add_time_features(df)
    df = add_heating_duration(df)
    
    # Train-test split (time-based)
    split_point = int(0.8 * len(df))
    df_train = df.iloc[:split_point].copy()
    df_test = df.iloc[split_point:].copy()
    
    return df_train, df_test

# Create datasets
df_train, df_test = load_and_preprocess_data()

train_dataset = TimeAwareSeriesDataset(
    df_train,
    continuous_features,
    binary_features,
    history_hours=3, 
    pred_horizon_hours=2, 
    max_gap_minutes=15 
)

test_dataset = TimeAwareSeriesDataset(
    df_test,
    continuous_features,
    binary_features,
    scaler=train_dataset.scaler,  # Use same scaler
    history_hours=3,
    pred_horizon_hours=2,
    max_gap_minutes=15
)

  df = pd.read_sql(query, conn)


In [None]:
class TimeSeriesMLP(nn.Module):
    def __init__(self, input_features=10, seq_length=1080):
        super().__init__()
        # Temporal compression (1080 -> 64)
        self.temporal_compressor = nn.Sequential(
            nn.Linear(seq_length, 64),
            nn.ReLU()
        )
        # Feature processing
        self.net = nn.Sequential(
            nn.Linear(64 * input_features, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

    def forward(self, x):
        # x shape: (batch, seq_len=1080, features=10)
        x = x.permute(0, 2, 1)  # (batch, features, seq_len)
        x = self.temporal_compressor(x)  # (batch, features, 64)
        x = x.reshape(x.size(0), -1)  # (batch, features*64)
        return self.net(x)

#Define Training and Evaluation Functions
def train_epoch(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for X, y in loader:
        X, y = X.to(device), y.to(device)
        optimizer.zero_grad()
        outputs = model(X)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * X.size(0)
    return total_loss / len(loader.dataset)

def evaluate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    all_preds, all_true = [], []
    with torch.no_grad():
        for X, y in loader:
            X, y = X.to(device), y.to(device)
            outputs = model(X)
            loss = criterion(outputs, y)
            total_loss += loss.item() * X.size(0)
            all_preds.extend(outputs.cpu().numpy().flatten())
            all_true.extend(y.cpu().numpy().flatten())
    mae = mean_absolute_error(all_true, all_preds)
    return total_loss / len(loader.dataset), mae

#Initialize Model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = TimeSeriesMLP(input_features=len(train_dataset.features), 
                     seq_length=train_dataset.history_length).to(device)
print(f"Model architecture:\n{model}")
print(f"Total parameters: {sum(p.numel() for p in model.parameters()):,}")

#Training Setup
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3)

train_loader = DataLoader(
    train_dataset,
    batch_size=64,
    shuffle=True,
    num_workers=4,        # Parallel data loading
    pin_memory=True,      # Faster GPU transfer
    persistent_workers=True
)

test_loader = DataLoader(
    test_dataset,
    batch_size=64,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)

os.makedirs('saved_models', exist_ok=True)
os.makedirs('plots', exist_ok=True)

best_val_loss = float('inf')
patience = 5
no_improve = 0

best_val_loss = float('inf')
patience = 5
no_improve = 0
train_losses = []
val_losses = []
val_maes = []

for epoch in range(20):
    train_loss = train_epoch(model, train_loader, optimizer, criterion, device)
    val_loss, val_mae = evaluate(model, test_loader, criterion, device)
    
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    val_maes.append(val_mae)
    
    scheduler.step(val_loss)
    
    print(f"Epoch {epoch+1:>3} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val MAE: {val_mae:.4f}")
    
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        no_improve = 0
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': val_loss,
            'metrics': {
                'train_losses': train_losses,
                'val_losses': val_losses,
                'val_maes': val_maes
            }
        }, 'saved_models/best_model.pth')
    else:
        no_improve += 1
        if no_improve >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

# Save final model
torch.save(model.state_dict(), 'saved_models/final_model.pth')

# Load Best Model
model.load_state_dict(torch.load('best_model.pth'))
final_train_loss, final_train_mae = evaluate(model, train_loader, criterion, device)
final_val_loss, final_val_mae = evaluate(model, test_loader, criterion, device)

print("\nFinal Performance:")
print(f"Train Loss: {final_train_loss:.4f} | MAE: {final_train_mae:.4f}")
print(f"Val Loss: {final_val_loss:.4f} | MAE: {final_val_mae:.4f}")

# 7. Prediction Example
def predict(model, dataset, idx, scaler):
    model.eval()
    X, y = dataset[idx]
    with torch.no_grad():
        pred = model(X.unsqueeze(0).to(device)).cpu().item()
    
    # Inverse transform if target was scaled
    if dataset.target_col in dataset.cont_features:
        dummy = np.zeros(len(dataset.cont_features))
        dummy[dataset.cont_features.index(dataset.target_col)] = pred
        pred = scaler.inverse_transform([dummy])[0][dataset.cont_features.index(dataset.target_col)]
        
        dummy[dataset.cont_features.index(dataset.target_col)] = y.item()
        y = scaler.inverse_transform([dummy])[0][dataset.cont_features.index(dataset.target_col)]
    
    print(f"\nSample {idx} Prediction:")
    print(f"True: {y:.2f} | Pred: {pred:.2f} | Error: {abs(y-pred):.2f}")

# Test prediction
predict(model, test_dataset, 0, train_dataset.scaler)

Model architecture:
TimeSeriesMLP(
  (temporal_compressor): Sequential(
    (0): Linear(in_features=1080, out_features=64, bias=True)
    (1): ReLU()
  )
  (net): Sequential(
    (0): Linear(in_features=640, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): ReLU()
    (6): Linear(in_features=64, out_features=1, bias=True)
  )
)
Total parameters: 274,497




KeyboardInterrupt: 

In [None]:

#Load Best Model
model.load_state_dict(torch.load('best_model.pth'))
final_train_loss, final_train_mae = evaluate(model, train_loader, criterion, device)
final_val_loss, final_val_mae = evaluate(model, test_loader, criterion, device)

print("\nFinal Performance:")
print(f"Train Loss: {final_train_loss:.4f} | MAE: {final_train_mae:.4f}")
print(f"Val Loss: {final_val_loss:.4f} | MAE: {final_val_mae:.4f}")

#Prediction Example
def predict(model, dataset, idx, scaler):
    model.eval()
    X, y = dataset[idx]
    with torch.no_grad():
        pred = model(X.unsqueeze(0).to(device)).cpu().item()
    
    # Inverse transform if target was scaled
    if dataset.target_col in dataset.cont_features:
        dummy = np.zeros(len(dataset.cont_features))
        dummy[dataset.cont_features.index(dataset.target_col)] = pred
        pred = scaler.inverse_transform([dummy])[0][dataset.cont_features.index(dataset.target_col)]
        
        dummy[dataset.cont_features.index(dataset.target_col)] = y.item()
        y = scaler.inverse_transform([dummy])[0][dataset.cont_features.index(dataset.target_col)]
    
    print(f"\nSample {idx} Prediction:")
    print(f"True: {y:.2f} | Pred: {pred:.2f} | Error: {abs(y-pred):.2f}")

# Test prediction
predict(model, test_dataset, 0, train_dataset.scaler)