In [None]:
# In a Jupyter cell
# Import necessary libraries and your custom environment
import sys
import os
import gymnasium as gym
import numpy as np

# Add the project's root directory to the Python path
# This allows us to import from the 'src' folder
sys.path.append(os.path.abspath('..'))
from src.environment.building_env import BuildingEnv

# --- Cell 1: Setup and Initialization ---
# Let's create an instance of our environment with some simple test data
print("--- Initializing Environment ---")
dummy_temps = [10.0] * 24 # A constant cold day
dummy_prices = [0.20] * 24 # A constant price
env = BuildingEnv(outside_temp_forecast=dummy_temps, energy_price_forecast=dummy_prices)

# Reset the environment to get the initial state
initial_state, info = env.reset()

print(f"Initial State (Observation): {initial_state}")
print(f"Observation Space Shape: {env.observation_space.shape}")
print(f"Action Space Size: {env.action_space.n}")
# The initial state should be [current_temp, outside_temp, hour_of_day, energy_price]
# Expected: [22.0, 10.0, 0, 0.20]

# --- Cell 2: Test a single step with a specific action ---
print("\n--- Testing a Single 'Heating' Step ---")

# Let's choose the 'Heat' action (Action 1)
action = 1 
next_state, reward, terminated, truncated, info = env.step(action)

print(f"Action Taken: Heat (1)")
print(f"Next State: {next_state}")
print(f"Reward Received: {reward:.2f}")
print(f"Is Episode Done?: {terminated}")
print(f"Info Dictionary: {info}")

# EXPECTATIONS:
# - The new temperature should be higher (e.g., 23.0)
# - The hour should be 1
# - The reward should be negative, reflecting the cost of heating
# - Info should contain the cost for this step.

# --- Cell 3: Test a single step with the 'Cool' action ---
print("\n--- Testing a Single 'Cooling' Step ---")
# The previous step left us at 23.0°C and Hour 1. Let's cool it down.

action = 2
next_state, reward, terminated, truncated, info = env.step(action)

print(f"Action Taken: Cool (2)")
print(f"Next State: {next_state}")
print(f"Reward Received: {reward:.2f}")
print(f"Is Episode Done?: {terminated}")

# --- Cell 4: Run a small loop to see how things evolve ---
print("\n--- Running a 5-step loop with random actions ---")
env.reset() # Start over
for i in range(5):
    random_action = env.action_space.sample() # Take a random action
    _, reward, _, _, _ = env.step(random_action)
    print(f"Step {i+1}:")
    env.render() # Use our human-readable render method

# This helps you see if the simulation is drifting or behaving unexpectedly over a few steps.

--- Initializing Environment ---
Initial State (Observation): [22.  10.   0.   0.2]
Observation Space Shape: (4,)
Action Space Size: 3

--- Testing a Single 'Heating' Step ---
Action Taken: Heat (1)
Next State: [23.  10.   1.   0.2]
Reward Received: -0.40
Is Episode Done?: False
Info Dictionary: {'cost': 0.4, 'comfort_penalty': 0}

--- Testing a Single 'Cooling' Step ---
Action Taken: Cool (2)
Next State: [22.  10.   2.   0.2]
Reward Received: -0.30
Is Episode Done?: False

--- Running a 5-step loop with random actions ---
Step 1:
Hour: 1, Temp: 20.80°C, Action: Cool, Cost: $0.02, Comfort Penalty: 0.00
Step 2:
Hour: 2, Temp: 19.80°C, Action: Heat, Cost: $0.32, Comfort Penalty: 0.20
Step 3:
Hour: 3, Temp: 20.80°C, Action: Heat, Cost: $0.72, Comfort Penalty: 0.20
Step 4:
Hour: 4, Temp: 19.80°C, Action: Cool, Cost: $1.02, Comfort Penalty: 0.40
Step 5:
Hour: 5, Temp: 18.82°C, Action: Cool, Cost: $1.04, Comfort Penalty: 1.58


  gym.logger.warn(
