## Consuming data using Kafka and Visualise (20%)
In this task, we will implement an Apache Kafka consumer to consume the data from Part 2.  
  
Important:   
-	In this part, Kafka consumers are used to consume the streaming data published from task 2.8.

In [1]:
# --- Rebuilt Simple Plotting Debugger V2 ---
from kafka3 import KafkaConsumer
import json
import time
from threading import Thread, Lock
import atexit
import matplotlib.pyplot as plt
from datetime import datetime, timedelta, date
import collections
import traceback

# --- Matplotlib Setup ---
%matplotlib notebook

# --- Configuration ---
HOST_IP = "192.168.0.6" # Use your machine's IP
TOPIC_BUILDING = "building_6h"
TOPIC_SITE = "site_daily"

# --- Shared Data Stores & Lock ---
building_data_store = collections.defaultdict(lambda: collections.defaultdict(dict))
site_data_store = collections.defaultdict(lambda: {str(i): 0 for i in range(16)})
data_lock = Lock()

# --- Store for latest points for plotting ---
latest_building_points = collections.deque(maxlen=50) # Keep last 50 points
latest_site_points = collections.deque(maxlen=50) # Keep last 50 points

consumers_list = [] # Keep track for cleanup

# --- Kafka Connection (Using Raw Deserializer) ---
def connect_kafka_consumer_threaded(topic):
    consumer = None
    # Ensure unique group ID for each run to force 'latest' behavior
    group_id = f'{topic}-plotter-{datetime.now()}'
    print(f"[{topic}] Attempting connection with unique group_id='{group_id}'...", flush=True)
    try:
        consumer = KafkaConsumer(
            topic, # Subscribe directly
            bootstrap_servers=[f'{HOST_IP}:9092'],
            auto_offset_reset='latest', # Start reading only NEW messages
            group_id=group_id,         # Use the unique group ID
            # ** Use raw string deserializer **
            value_deserializer=lambda x: x.decode('utf-8', errors='ignore'),
            api_version=(0, 10),
            request_timeout_ms=60000,
            session_timeout_ms=30000,
            # Can keep auto-commit enabled for simplicity here
            enable_auto_commit=True
        )
        partitions = consumer.partitions_for_topic(topic)
        if partitions is None: # Check if None specifically
             print(f"[{topic}] Warning: Connected but partitions_for_topic returned None.", flush=True)
             # Don't exit, consumer might still work if partitions assign later
        elif len(partitions) == 0:
             print(f"[{topic}] Warning: Connected but no partitions found for topic.", flush=True)
             # Don't exit here either
        else:
            print(f"[{topic}] Connection SUCCESSFUL. Partitions initially found: {partitions}", flush=True)

        consumers_list.append(consumer)
        return consumer
    except Exception as e:
        print(f"[{topic}] FAILED to connect or fetch partitions: {e}", flush=True)
        traceback.print_exc()
        if consumer: consumer.close()
        return None

# --- Consumer Thread Function (Manual JSON Parsing) ---
def data_consumer_thread(topic):
    """Thread function to consume, parse (manually), and store data."""
    print(f"[{topic}] Data consumer thread started.", flush=True)
    consumer = connect_kafka_consumer_threaded(topic)
    if not consumer:
        print(f"[{topic}] Thread exiting due to connection failure.", flush=True)
        return

    message_count = 0
    error_count = 0
    success_count = 0
    print(f"[{topic}] Entering message loop...", flush=True)

    try:
        for message in consumer:
            message_count += 1
            # ** DEBUG: Confirm message is received by the loop **
#             print(f"[{topic}] Loop received message offset {message.offset}", flush=True)
            raw_value_str = message.value

            try:
                # ** Manual JSON loading **
                msg = json.loads(raw_value_str)

                if not isinstance(msg, dict):
                    print(f"[{topic}] Error: Decoded JSON is not a dictionary (Offset {message.offset}): {msg}", flush=True)
                    error_count += 1
                    continue

                now = datetime.now() # Timestamp for plotting latest points

                # --- Process based on topic ---
                if topic == TOPIC_BUILDING:
                    bldg_id = msg.get('building_id')
                    time_bucket = msg.get('time')
                    val_raw = msg.get('total_power_6h')
                    date_str = msg.get('date') # Crucial field

                    if not all([bldg_id is not None, time_bucket, val_raw is not None, date_str is not None]): # Check date explicitly
                        print(f"[{topic}] Skipping building msg (Offset {message.offset}) - Missing fields. Has date? {'Yes' if date_str else 'NO'}. Keys: {list(msg.keys())}", flush=True)
                        error_count += 1
                        continue
                    try: val = float(val_raw)
                    except (ValueError, TypeError):
                        print(f"[{topic}] Invalid building value type (Offset {message.offset}): '{val_raw}'", flush=True)
                        error_count += 1
                        continue

                    with data_lock:
                        building_data_store[date_str][time_bucket][str(bldg_id)] = val
                        latest_building_points.append((now, val))
                        # ** DEBUG: Confirm append **
#                         print(f"[{topic}] Appended to latest_building_points (len={len(latest_building_points)})", flush=True)
                    success_count += 1

                elif topic == TOPIC_SITE:
                    site_id_raw = msg.get('site_id')
                    val_raw = msg.get('total_power_day')
                    date_str = msg.get('date') # Crucial field

                    if site_id_raw is None or val_raw is None or date_str is None: # Check date explicitly
                        print(f"[{topic}] Skipping site msg (Offset {message.offset}) - Missing fields. Has date? {'Yes' if date_str else 'NO'}. Keys: {list(msg.keys())}", flush=True)
                        error_count += 1
                        continue
                    try: val = float(val_raw)
                    except (ValueError, TypeError):
                        print(f"[{topic}] Invalid site value type (Offset {message.offset}): '{val_raw}'", flush=True)
                        error_count += 1
                        continue

                    site_id_str = str(site_id_raw)
                    if site_id_str not in [str(i) for i in range(16)]:
                        print(f"[{topic}] Invalid site_id (Offset {message.offset}): {site_id_str}", flush=True)
                        error_count += 1
                        continue

                    with data_lock:
                        site_data_store[date_str][site_id_str] = val
                        latest_site_points.append((now, val))
                        # ** DEBUG: Confirm append **
#                         print(f"[{topic}] Appended to latest_site_points (len={len(latest_site_points)})", flush=True)
                    success_count += 1

            except json.JSONDecodeError as json_e:
                print(f"[{topic}] JSON Decode Error (Offset {message.offset}): {json_e}. Raw data: {raw_value_str}", flush=True)
                error_count += 1
            except Exception as e_parse:
                print(f"[{topic}] Error processing message content (Offset {message.offset}): {e_parse}. Raw: {raw_value_str}", flush=True)
                traceback.print_exc()
                error_count += 1

            if message_count % 50000 == 0:
                 print(f"[{topic}] Stats - Processed: {message_count}, Succeeded: {success_count}, Failed: {error_count}", flush=True)

    except Exception as e_outer_loop:
        print(f"[{topic}] Error IN message loop: {e_outer_loop}", flush=True)
        traceback.print_exc()
    finally:
        print(f"[{topic}] Exiting message loop. Final Counts - Processed: {message_count}, Succeeded: {success_count}, Failed: {error_count}", flush=True)


# --- Plotter and Manager Thread (Same as previous "Rebuilt" version) ---
def simple_plotter_manager_thread(fig, ax_bldg, ax_site):
    """Plots latest points, prunes data, and prints store size."""
    print("[Plotter] Plotter thread started.", flush=True)
    while True:
        try:
            building_dates_count = 0
            site_dates_count = 0
            latest_b_times = []
            latest_b_values = []
            latest_s_times = []
            latest_s_values = []

            # --- Pruning and Reading ---
            with data_lock:
                # Prune data older than 8 days from today
                prune_threshold_date = date.today() - timedelta(days=8)
                prune_threshold_str = prune_threshold_date.isoformat()

                prune_keys_b = [k for k in building_data_store if k < prune_threshold_str]
                prune_keys_s = [k for k in site_data_store if k < prune_threshold_str]

                pruned_b_count = len(prune_keys_b) # Count before deleting
                pruned_s_count = len(prune_keys_s)

                for key in prune_keys_b:
                    if key in building_data_store: del building_data_store[key]
                for key in prune_keys_s:
                     if key in site_data_store: del site_data_store[key]

                # Get counts AFTER pruning
                building_dates_count = len(building_data_store)
                site_dates_count = len(site_data_store)

                # Get latest points for plotting
                if latest_building_points:
                    latest_b_times, latest_b_values = zip(*list(latest_building_points)) # Use list() for safe iteration
                if latest_site_points:
                    latest_s_times, latest_s_values = zip(*list(latest_site_points)) # Use list() for safe iteration

            # --- Debug Print ---
            print(f"\n--- DEBUG @ {datetime.now().strftime('%H:%M:%S')} ---", flush=True)
            print(f"  Building Store Dates: {building_dates_count}", flush=True)
            print(f"  Site Store Dates: {site_dates_count}", flush=True)
            if pruned_b_count > 0 or pruned_s_count > 0:
                print(f"  Pruned {pruned_b_count} building days, {pruned_s_count} site days (older than {prune_threshold_str})", flush=True)

            # --- Plotting ---
            ax_bldg.clear()
            if latest_b_times:
                ax_bldg.plot(latest_b_times, latest_b_values, marker='o', linestyle='-', markersize=3, label='Building_6h (Latest)')
                fig.autofmt_xdate()
            ax_bldg.set_title("Latest Building_6h Values Received")
            ax_bldg.set_ylabel("Total Power (6h)")
            ax_bldg.legend(loc='upper left')
            ax_bldg.grid(True)


            ax_site.clear()
            if latest_s_times:
                ax_site.plot(latest_s_times, latest_s_values, marker='x', linestyle='--', markersize=4, color='orange', label='Site_Daily (Latest)')
                fig.autofmt_xdate()
            ax_site.set_title("Latest Site_Daily Values Received")
            ax_site.set_ylabel("Total Power (Daily)")
            ax_site.set_xlabel("Time Received")
            ax_site.legend(loc='upper left')
            ax_site.grid(True)

            fig.canvas.draw_idle()

            plt.pause(2.0)

        except Exception as e:
            print(f"[Plotter Error] An error occurred: {e}", flush=True)
            traceback.print_exc()
            time.sleep(2.0)


# --- Main Execution Block (Same setup) ---
def cleanup_all_consumers():
    print("\n--- Cleaning up all consumers ---", flush=True)
    for c in consumers_list:
        try: c.close()
        except: pass
    print("--- Cleanup complete ---", flush=True)

atexit.register(cleanup_all_consumers)

print("--- Simple Multi-Threaded Plotting Debugger V2 ---", flush=True)
try:
    print("Initializing plot...", flush=True)
    fig, (ax_bldg, ax_site) = plt.subplots(2, 1, figsize=(10, 8), sharex=True)
    fig.show()
    print("Plot initialized.", flush=True)

    thread_bldg = Thread(target=data_consumer_thread, args=(TOPIC_BUILDING,), daemon=True)
    thread_site = Thread(target=data_consumer_thread, args=(TOPIC_SITE,), daemon=True)
    thread_plotter = Thread(target=simple_plotter_manager_thread, args=(fig, ax_bldg, ax_site), daemon=True)

    print("Starting threads...", flush=True)
    thread_bldg.start()
    thread_site.start()
    thread_plotter.start()

    print("--- Threads running (Interrupt Kernel to stop) ---", flush=True)

    while True: time.sleep(10)

except KeyboardInterrupt:
    print("\n--- Main thread interrupted by user ---", flush=True)
except Exception as e:
    print(f"\n--- An error occurred in main thread: {e} ---", flush=True)
    traceback.print_exc()
finally:
    print("--- Main thread exiting ---", flush=True)

--- Simple Multi-Threaded Plotting Debugger V2 ---
Initializing plot...


<IPython.core.display.Javascript object>

Plot initialized.
Starting threads...
[building_6h] Data consumer thread started.
[building_6h] Attempting connection with unique group_id='building_6h-plotter-2025-10-27 03:21:46.133286'...
[site_daily] Data consumer thread started.
[Plotter] Plotter thread started.
--- Threads running (Interrupt Kernel to stop) ---

--- DEBUG @ 03:21:46 ---
[site_daily] Attempting connection with unique group_id='site_daily-plotter-2025-10-27 03:21:46.135531'...
  Building Store Dates: 0
  Site Store Dates: 0


No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.


[building_6h] Connection SUCCESSFUL. Partitions initially found: {0}
[building_6h] Entering message loop...
[site_daily] Connection SUCCESSFUL. Partitions initially found: {0}
[site_daily] Entering message loop...

--- DEBUG @ 03:21:48 ---
  Building Store Dates: 0
  Site Store Dates: 0


No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.



--- DEBUG @ 03:21:50 ---
  Building Store Dates: 0
  Site Store Dates: 0


No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.



--- DEBUG @ 03:21:52 ---
  Building Store Dates: 0
  Site Store Dates: 0


No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.



--- DEBUG @ 03:21:54 ---
  Building Store Dates: 0
  Site Store Dates: 0


No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.



--- DEBUG @ 03:21:56 ---
  Building Store Dates: 0
  Site Store Dates: 0


No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.



--- DEBUG @ 03:21:58 ---
  Building Store Dates: 0
  Site Store Dates: 0


No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.



--- DEBUG @ 03:22:00 ---
  Building Store Dates: 0
  Site Store Dates: 0


No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.



--- DEBUG @ 03:22:02 ---
  Building Store Dates: 0
  Site Store Dates: 0


No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.



--- DEBUG @ 03:22:04 ---
  Building Store Dates: 0
  Site Store Dates: 0


No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.



--- DEBUG @ 03:22:06 ---
  Building Store Dates: 0
  Site Store Dates: 0


No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.



--- DEBUG @ 03:22:08 ---
  Building Store Dates: 0
  Site Store Dates: 0


No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.



--- DEBUG @ 03:22:10 ---
  Building Store Dates: 0
  Site Store Dates: 0


No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.



--- DEBUG @ 03:22:13 ---
  Building Store Dates: 0
  Site Store Dates: 0


No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.



--- DEBUG @ 03:22:15 ---
  Building Store Dates: 0
  Site Store Dates: 0


No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.



--- DEBUG @ 03:22:17 ---
  Building Store Dates: 0
  Site Store Dates: 0


No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.



--- DEBUG @ 03:22:19 ---
  Building Store Dates: 0
  Site Store Dates: 0


No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.



--- DEBUG @ 03:22:21 ---
  Building Store Dates: 0
  Site Store Dates: 0
  Pruned 4 building days, 0 site days (older than 2025-10-19)


No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.



--- DEBUG @ 03:22:23 ---
  Building Store Dates: 0
  Site Store Dates: 0
  Pruned 4 building days, 0 site days (older than 2025-10-19)


No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.



--- DEBUG @ 03:22:25 ---
  Building Store Dates: 0
  Site Store Dates: 0
  Pruned 4 building days, 4 site days (older than 2025-10-19)

--- DEBUG @ 03:22:27 ---
  Building Store Dates: 0
  Site Store Dates: 0
  Pruned 0 building days, 4 site days (older than 2025-10-19)

--- DEBUG @ 03:22:29 ---
  Building Store Dates: 0
  Site Store Dates: 0

--- DEBUG @ 03:22:31 ---
  Building Store Dates: 0
  Site Store Dates: 0

--- DEBUG @ 03:22:33 ---
  Building Store Dates: 0
  Site Store Dates: 0

--- DEBUG @ 03:22:36 ---
  Building Store Dates: 0
  Site Store Dates: 0

--- DEBUG @ 03:22:38 ---
  Building Store Dates: 0
  Site Store Dates: 0
  Pruned 0 building days, 3 site days (older than 2025-10-19)

--- DEBUG @ 03:22:40 ---
  Building Store Dates: 0
  Site Store Dates: 0

--- DEBUG @ 03:22:42 ---
  Building Store Dates: 0
  Site Store Dates: 0

--- DEBUG @ 03:22:44 ---
  Building Store Dates: 0
  Site Store Dates: 0

--- DEBUG @ 03:22:46 ---
  Building Store Dates: 0
  Site Store Dates: 0


In [None]:
# Debug 2
# --- Minimal Multi-Threaded Debugger ---
from kafka3 import KafkaConsumer
import json
import time
from threading import Thread
import atexit
from datetime import datetime

# --- Configuration ---
HOST_IP = "192.168.0.6" # Use your machine's IP
TOPIC_BUILDING = "building_6h"
TOPIC_SITE = "site_daily"

consumers_list = [] # Keep track for cleanup

def connect_kafka_consumer_threaded(topic):
    """Connects to Kafka for threaded consumer."""
    consumer = None
    try:
        consumer = KafkaConsumer(
            topic,
            bootstrap_servers=[f'{HOST_IP}:9092'],
            auto_offset_reset='latest',#'earliest',
            # No timeout needed as each thread blocks on its consumer
            value_deserializer=lambda x: x.decode('utf-8'), # Get raw string
            # ADD A NEW group_id to force new data instead of cache:
            group_id=f'{topic}-v2-{datetime.now()}',
#             # Important: Disable auto-commit initially if seeking
#             enable_auto_commit=False
        )
        consumers_list.append(consumer) # Add for cleanup
        print(f"[{topic}] Consumer connected successfully.")
        return consumer
    except Exception as e:
        print(f"[{topic}] Failed to connect consumer: {e}")
        return None

def debug_consumer_thread(topic):
    """Thread function to consume and print messages from a topic."""
    print(f"[{topic}] Debug thread started.")
    consumer = connect_kafka_consumer_threaded(topic)
    if not consumer:
        print(f"[{topic}] Thread exiting due to connection failure.")
        return

    message_count = 0
    error_count = 0
    try:
        for message in consumer:
            message_count += 1
            if message_count > 5:
                break
            print(f"\n[{topic}] Message {message_count} (Offset: {message.offset}):")
            raw_value = message.value
            print(f"  Raw Value (type={type(raw_value)}): {raw_value}")
            try:
                # Try decoding as JSON
                decoded_value = json.loads(raw_value)
                print(f"  Decoded JSON (type={type(decoded_value)}): {decoded_value}")
                if isinstance(decoded_value, dict):
                    print(f"  JSON Keys: {list(decoded_value.keys())}")
            except json.JSONDecodeError as e:
                print(f"  JSON Decode Error: {e}")
                error_count += 1
            except Exception as e:
                print(f"  Other Error during decode: {e}")
                error_count += 1

            # Optional: Add a small sleep to prevent overwhelming output if needed
            # time.sleep(0.1)

    except KeyboardInterrupt:
        print(f"[{topic}] KeyboardInterrupt received.")
    except Exception as e:
        print(f"[{topic}] Error in consumer loop: {e}")
    finally:
        print(f"[{topic}] Debug thread finishing. Processed: {message_count}, Errors: {error_count}")
        # Consumer closing is handled by atexit

def cleanup_all_consumers():
    """Close all registered consumers."""
    print("\n--- Cleaning up all consumers ---")
    for c in consumers_list:
        try:
            c.close()
        except Exception as e:
            print(f"Error closing consumer: {e}")
    print("--- Cleanup complete ---")

# Register cleanup function to run at script exit
atexit.register(cleanup_all_consumers)

print("--- Multi-Threaded Kafka Debugger ---")
try:
    # Create and start threads
    thread_bldg = Thread(target=debug_consumer_thread, args=(TOPIC_BUILDING,), daemon=True)
    thread_site = Thread(target=debug_consumer_thread, args=(TOPIC_SITE,), daemon=True)

    print("Starting debug threads...")
    thread_bldg.start()
    thread_site.start()

    print("--- Debug threads running (Press Ctrl+C or Interrupt Kernel to stop) ---")

    # Keep main thread alive to allow daemon threads to run
    # In Jupyter, this cell might appear to finish, but threads continue.
    while True:
        time.sleep(10) # Keep main thread alive, checking periodically

except KeyboardInterrupt:
    print("\n--- Main thread interrupted by user ---")
except Exception as e:
    print(f"\n--- An error occurred in main thread: {e} ---")
finally:
    print("--- Main thread exiting ---")
    # Cleanup will be called automatically by atexit

--- Multi-Threaded Kafka Debugger ---
Starting debug threads...
[building_6h] Debug thread started.
[site_daily] Debug thread started.
--- Debug threads running (Press Ctrl+C or Interrupt Kernel to stop) ---
[site_daily] Consumer connected successfully.
[building_6h] Consumer connected successfully.

[site_daily] Message 1 (Offset: 133107):
  Raw Value (type=<class 'str'>): {"site_id":15,"date":"2022-02-07","total_power_day":1360.7191642364162}
  Decoded JSON (type=<class 'dict'>): {'site_id': 15, 'date': '2022-02-07', 'total_power_day': 1360.7191642364162}
  JSON Keys: ['site_id', 'date', 'total_power_day']

[site_daily] Message 2 (Offset: 133108):
  Raw Value (type=<class 'str'>): {"site_id":0,"date":"2022-02-06","total_power_day":1367.0817884670184}
  Decoded JSON (type=<class 'dict'>): {'site_id': 0, 'date': '2022-02-06', 'total_power_day': 1367.0817884670184}
  JSON Keys: ['site_id', 'date', 'total_power_day']

[site_daily] Message 3 (Offset: 133109):
  Raw Value (type=<class 'str

In [1]:
# --- Plotter V3 with D+2 Logic & Consumer Poll ---
from kafka3 import KafkaConsumer, TopicPartition
import json
import time
from threading import Thread, Lock, Event # Import Event
import atexit
import matplotlib.pyplot as plt
from datetime import datetime, timedelta, date
import collections
import traceback
import uuid

# --- Matplotlib Setup ---
%matplotlib notebook

# --- Configuration ---
HOST_IP = "192.168.0.6" # Correct IP
TOPIC_BUILDING = "building_6h"
TOPIC_SITE = "site_daily"

# --- Shared Data Stores & Lock ---
building_data_store = collections.defaultdict(lambda: collections.defaultdict(dict))
site_data_store = collections.defaultdict(lambda: {str(i): 0 for i in range(16)})
data_lock = Lock()

consumers_list = []
stop_event = Event() # Event to signal threads to stop

# --- Kafka Connection (Adjusted Timeouts) ---
def connect_kafka_consumer_threaded(topic):
    consumer = None
    group_id = f'{topic}-plotter-v3-{uuid.uuid4()}' # Unique group ID
    print(f"[{topic}] Attempting connection with unique group_id='{group_id}'...", flush=True)
    try:
        consumer = KafkaConsumer(
            # No topic assigned here yet, will subscribe/assign later
            bootstrap_servers=[f'{HOST_IP}:9092'],
            auto_offset_reset='latest', # Start reading only NEW messages
            group_id=group_id,
            value_deserializer=lambda x: x.decode('utf-8', errors='ignore'), # Raw string
            api_version=(0, 10),
            request_timeout_ms=65000,
            session_timeout_ms=60000,  # 60 seconds session timeout
            heartbeat_interval_ms=15000, # 15 seconds heartbeat
            enable_auto_commit=True # Auto-commit enabled
        )
        print(f"[{topic}] Consumer object created. Subscribing to topic...", flush=True)
        consumer.subscribe([topic]) # Subscribe to the topic
        # Allow some time for partition assignment
        time.sleep(5)
        assignment = consumer.assignment()
        if not assignment:
             print(f"[{topic}] Warning: Connected but no partitions assigned after subscription.", flush=True)
             # Attempt to poll once to force assignment
             consumer.poll(timeout_ms=1000, max_records=1)
             assignment = consumer.assignment()
             if not assignment:
                  print(f"[{topic}] Still no partitions assigned after poll. Exiting.", flush=True)
                  consumer.close()
                  return None
        print(f"[{topic}] Connection and subscription SUCCESSFUL. Assignment: {assignment}", flush=True)
        consumers_list.append(consumer)
        return consumer
    except Exception as e:
        print(f"[{topic}] FAILED during connection/subscription: {e}", flush=True)
        traceback.print_exc()
        if consumer: consumer.close()
        return None

# --- Consumer Thread Function (Using consumer.poll) ---
def data_consumer_thread(topic):
    """Thread function using consumer.poll() for better responsiveness."""
    print(f"[{topic}] Data consumer thread started.", flush=True)
    consumer = connect_kafka_consumer_threaded(topic)
    if not consumer:
        print(f"[{topic}] Thread exiting due to connection failure.", flush=True)
        return

    message_count = 0
    error_count = 0
    success_count = 0
    print(f"[{topic}] Entering polling loop...", flush=True)

    try:
        while not stop_event.is_set(): # Check stop event
            # Poll for messages with a 1-second timeout
            # This returns control even if no messages, allowing heartbeats
            msg_pack = consumer.poll(timeout_ms=1000)

            if not msg_pack:
                # print(f"[{topic}] No messages received in poll interval.", flush=True) # Optional debug
                continue # Go back to polling

            # Process messages received in the poll
            for tp, messages in msg_pack.items():
                for message in messages:
                    message_count += 1
                    raw_value_str = message.value
                    
                    # --- DEBUG: Print raw string value ---
#                     print(f"[{topic}] Raw message received (Offset {message.offset}): {raw_value_str}", flush=True)
            
                    try:
                        msg = json.loads(raw_value_str)
                        if not isinstance(msg, dict):
                            print(f"[{topic}] Error: Decoded JSON not dict (Offset {message.offset})", flush=True); error_count += 1; continue

                        now = datetime.now()
                        date_str = msg.get('date') # Check date first

                        if not date_str: # Skip immediately if no date
                             print(f"[{topic}] Skipping msg (Offset {message.offset}) - Missing 'date' field. Keys: {list(msg.keys())}", flush=True)
                             error_count += 1
                             continue

                        # --- Process based on topic ---
                        if topic == TOPIC_BUILDING:
                            bldg_id = msg.get('building_id')
                            time_bucket = msg.get('time')
                            val_raw = msg.get('total_power_6h')
                            # Note: date_str already extracted

                            if not all([bldg_id is not None, time_bucket, val_raw is not None]): # Date already checked
                                print(f"[{topic}] Skipping building msg (Offset {message.offset}) - Missing fields. Keys: {list(msg.keys())}", flush=True); error_count += 1; continue
                            try: val = float(val_raw)
                            except (ValueError, TypeError): print(f"[{topic}] Invalid building value (Offset {message.offset})", flush=True); error_count += 1; continue

                            with data_lock:
                                building_data_store[date_str][time_bucket][str(bldg_id)] = val
                            success_count += 1

                        elif topic == TOPIC_SITE:
                            site_id_raw = msg.get('site_id')
                            val_raw = msg.get('total_power_day')
                            # Note: date_str already extracted

                            if site_id_raw is None or val_raw is None: # Date already checked
                                print(f"[{topic}] Skipping site msg (Offset {message.offset}) - Missing fields. Keys: {list(msg.keys())}", flush=True); error_count += 1; continue
                            try: val = float(val_raw)
                            except (ValueError, TypeError): print(f"[{topic}] Invalid site value (Offset {message.offset})", flush=True); error_count += 1; continue

                            site_id_str = str(site_id_raw)
                            if site_id_str not in [str(i) for i in range(16)]: print(f"[{topic}] Invalid site_id (Offset {message.offset})", flush=True); error_count += 1; continue

                            with data_lock:
                                site_data_store[date_str][site_id_str] = val
                            success_count += 1

                    except json.JSONDecodeError as json_e: print(f"[{topic}] JSON Error (Offset {message.offset}): {json_e}", flush=True); error_count += 1
                    except Exception as e_parse: print(f"[{topic}] Processing Error (Offset {message.offset}): {e_parse}", flush=True); traceback.print_exc(); error_count += 1

            # Print stats occasionally after processing a batch
            if message_count % 100 == 0 and message_count > 0:
                 print(f"[{topic}] Stats - Processed: {message_count}, Succeeded: {success_count}, Failed: {error_count}", flush=True)

            # Optional small sleep if CPU usage is too high
            time.sleep(0.01)

    except Exception as e_outer_loop:
        print(f"[{topic}] Error in Polling Loop: {e_outer_loop}", flush=True)
        traceback.print_exc()
    finally:
        print(f"[{topic}] Exiting polling loop. Final Counts - Processed: {message_count}, Succeeded: {success_count}, Failed: {error_count}", flush=True)

# --- Plotter Thread (D+2 Logic) ---

# Keep is_data_ready_for_date function (it's still useful)
def is_data_ready_for_date(target_date_str):
    with data_lock:
        site_data = site_data_store.get(target_date_str)
        building_data = building_data_store.get(target_date_str)
        site_ready = site_data is not None and any(v > 0 for v in site_data.values())
        building_ready = building_data is not None and all(b in building_data for b in ["0-6h", "6-12h", "12-18h", "18-24h"])
    return site_ready and building_ready

def plotter_and_manager_thread(fig_building, axes_building, fig_site, axes_site):
    """Plots the OLDEST date D if data for D+2 exists, then removes D."""
    print("[Plotter] Plotter thread started.", flush=True)
    last_plotted_date_str = None

    while not stop_event.is_set():
        try:
            plot_occurred = False
            target_str_to_plot = None
            latest_date_in_store = None

            with data_lock:
                # Find the latest date present in *either* store
                all_dates = sorted(list(set(building_data_store.keys()) | set(site_data_store.keys())))
                if all_dates:
                    latest_date_in_store = datetime.strptime(all_dates[-1], '%Y-%m-%d').date()

                # --- Find Oldest Plot Target (D) based on D+2 rule ---
                if latest_date_in_store:
                    # Potential target date is 2 days before the latest seen date
                    potential_target_date = latest_date_in_store - timedelta(days=2)
                    potential_target_str = potential_target_date.isoformat()

                    # Check if this potential target exists and is ready
                    if potential_target_str in all_dates and is_data_ready_for_date(potential_target_str):
                         # Ensure we only plot dates *after* the last one plotted
                        if last_plotted_date_str is None or potential_target_str > last_plotted_date_str:
                             target_str_to_plot = potential_target_str
                        # else:
                        #    print(f"[Plotter] Skipping {potential_target_str}, already plotted or older than {last_plotted_date_str}", flush=True)


                # --- Plotting ---
                if target_str_to_plot:
                    prev_str = (datetime.strptime(target_str_to_plot, '%Y-%m-%d').date() - timedelta(days=1)).isoformat()
                    print(f"\n--- Plotting date D = {target_str_to_plot} (triggered by D+2 = {latest_date_in_store.isoformat()}) ---", flush=True)

                    draw_building_plot(fig_building, axes_building, target_str_to_plot)
                    draw_site_plot(fig_site, axes_site, target_str_to_plot, prev_str)
                    fig_building.canvas.draw_idle()
                    fig_site.canvas.draw_idle()

                    plot_occurred = True
                    last_plotted_date_str = target_str_to_plot

                    # Remove the plotted date's data
                    if target_str_to_plot in building_data_store: del building_data_store[target_str_to_plot]
                    if target_str_to_plot in site_data_store: del site_data_store[target_str_to_plot]
                    print(f"--- Plotted and removed data for {target_str_to_plot} ---", flush=True)
                # else:
                    # print(f"[Plotter] No suitable date to plot found.", flush=True) # Optional Debug


                # --- Debugger ---
                print(f"\n--- DEBUGGER @ {datetime.now().strftime('%H:%M:%S')} ---", flush=True)
                print(f"Latest Date in Store: {latest_date_in_store.isoformat() if latest_date_in_store else 'N/A'}", flush=True)
                print(f"Target to Plot: {target_str_to_plot if target_str_to_plot else 'None'}", flush=True)
                print(f"Last Plotted: {last_plotted_date_str if last_plotted_date_str else 'None'}", flush=True)

                # (Keep store counts and pruning logic as before)
                print("[Site Daily Counts (Oldest 10 in Store)]:", flush=True)
                sorted_site_keys = sorted(site_data_store.keys())[:10]
                if not sorted_site_keys: print("  (No data)", flush=True)
                else:
                    for date_str in sorted_site_keys:
                        count = sum(1 for v in site_data_store[date_str].values() if v > 0); print(f"  {date_str}: {count}/16 sites > 0", flush=True)
                print("[Building 6h Counts (Oldest 10 in Store)]:", flush=True)
                sorted_bldg_keys = sorted(building_data_store.keys())[:10]
                if not sorted_bldg_keys: print("  (No data)", flush=True)
                else:
                    for date_str in sorted_bldg_keys:
                        b_data = building_data_store[date_str]; c0 = len(b_data.get("0-6h", {})); c1 = len(b_data.get("6-12h", {})); c2 = len(b_data.get("12-18h", {})); c3 = len(b_data.get("18-24h", {}))
                        all_buckets_present = all(b in b_data for b in ["0-6h", "6-12h", "12-18h", "18-24h"]); all_buckets_sym = "âœ“" if all_buckets_present else "âœ—"
                        print(f"  {date_str}: Buckets:{all_buckets_sym} [{c0},{c1},{c2},{c3}]", flush=True)

                # --- Pruning ---
                prune_threshold_date = date.today() - timedelta(days=20); prune_threshold_str = prune_threshold_date.isoformat()
                prune_keys_b = [k for k in building_data_store if k < prune_threshold_str]; prune_keys_s = [k for k in site_data_store if k < prune_threshold_str]
                pruned_b_count = 0; pruned_s_count = 0
                for key in prune_keys_b:
                    if key in building_data_store: del building_data_store[key]; pruned_b_count += 1
                for key in prune_keys_s:
                    if key in site_data_store: del site_data_store[key]; pruned_s_count += 1
                if pruned_b_count > 0 or pruned_s_count > 0: print(f"Pruned {pruned_b_count} bldg / {pruned_s_count} site days (older than {prune_threshold_str}).", flush=True)

            # --- Wait ---
            plt.pause(5.0) # Check every 5 seconds

        except Exception as e:
            print(f"[Plotter Error] An error occurred: {e}. Retrying in 5s.", flush=True)
            traceback.print_exc()
            time.sleep(5)

# --- Main Execution Block ---
def cleanup_all_consumers():
    print("\n--- Sending stop signal & Cleaning up all consumers ---", flush=True)
    stop_event.set() # Signal threads to stop
    time.sleep(2) # Give threads a moment to exit loop
    for c in consumers_list:
        try: c.close()
        except: pass
    print("--- Cleanup complete ---", flush=True)

atexit.register(cleanup_all_consumers)

print("--- Plotter V3 (D+2 Logic, Consumer Poll) ---", flush=True)
try:
    print("Initializing plots...", flush=True)
    fig_building, axes_building = plt.subplots(2, 2, figsize=(12, 9))
    fig_building.show()
    fig_site, axes_site = plt.subplots(2, 1, figsize=(12, 9))
    fig_site.show()
    print("Plots initialized.", flush=True)

    thread_bldg = Thread(target=data_consumer_thread, args=(TOPIC_BUILDING,), daemon=True)
    thread_site = Thread(target=data_consumer_thread, args=(TOPIC_SITE,), daemon=True)
    thread_plotter = Thread(target=plotter_and_manager_thread, args=(fig_building, axes_building, fig_site, axes_site), daemon=True)

    print("Starting all threads...", flush=True)
    thread_bldg.start()
    thread_site.start()
    thread_plotter.start()

    print("--- All consumer and plotter threads are running. ---", flush=True)
    print("--- Plotter will plot date D when data for D+2 arrives. ---", flush=True)
    print("--- Interrupt the kernel to end. ---", flush=True)

    # Keep main thread alive cleanly
    while not stop_event.wait(timeout=10): # Check every 10s if interrupted
         pass

except KeyboardInterrupt:
    print("\n--- Main thread interrupted by user ---", flush=True)
except Exception as e:
    print(f"\n--- An error occurred in main thread: {e} ---", flush=True)
    traceback.print_exc()
finally:
    print("--- Main thread exiting, signaling stop ---", flush=True)
    stop_event.set() # Ensure stop is signaled on exit

--- Plotter V3 (D+2 Logic, Consumer Poll) ---
Initializing plots...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Plots initialized.
Starting all threads...
[building_6h] Data consumer thread started.
[site_daily] Data consumer thread started.
[Plotter] Plotter thread started.
--- All consumer and plotter threads are running. ---
[building_6h] Attempting connection with unique group_id='building_6h-plotter-v3-d0b4040a-9075-4683-bf92-c718654a18bc'...

--- DEBUGGER @ 01:43:14 ---
--- Plotter will plot date D when data for D+2 arrives. ---
[site_daily] Attempting connection with unique group_id='site_daily-plotter-v3-c2e2ecd1-f816-4761-b331-a2ef9bed2f7e'...
[building_6h] Consumer object created. Subscribing to topic...
Latest Date in Store: N/A
[site_daily] Consumer object created. Subscribing to topic...
--- Interrupt the kernel to end. ---
Target to Plot: None
Last Plotted: None
[Site Daily Counts (Oldest 10 in Store)]:
  (No data)
[Building 6h Counts (Oldest 10 in Store)]:
  (No data)

--- DEBUGGER @ 01:43:19 ---
Latest Date in Store: N/A
Target to Plot: None
Last Plotted: None
[Site Daily Counts 

Heartbeat poll expired, leaving group
Heartbeat poll expired, leaving group



--- Main thread interrupted by user ---
--- Main thread exiting, signaling stop ---


In [1]:
# --- Plotter V4 Optimized Lock ---
from kafka3 import KafkaConsumer, TopicPartition
import json
import time
from threading import Thread, Lock, Event
import atexit
import matplotlib.pyplot as plt
from datetime import datetime, timedelta, date
import collections
import traceback
import uuid
import copy # Import copy for deepcopy

# --- Matplotlib Setup ---
%matplotlib notebook

# --- Configuration (Keep as before) ---
HOST_IP = "192.168.0.6"
TOPIC_BUILDING = "building_6h"
TOPIC_SITE = "site_daily"

# --- Shared Data Stores & Lock (Keep as before) ---
building_data_store = collections.defaultdict(lambda: collections.defaultdict(dict))
site_data_store = collections.defaultdict(lambda: {str(i): 0 for i in range(16)})
data_lock = Lock()

consumers_list = []
stop_event = Event()

# --- Kafka Connection (Keep connect_kafka_consumer_threaded as before) ---
def connect_kafka_consumer_threaded(topic):
    consumer = None
    group_id = f'{topic}-plotter-v4-{uuid.uuid4()}' # Unique group ID
    print(f"[{topic}] Attempting connection with unique group_id='{group_id}'...", flush=True)
    try:
        consumer = KafkaConsumer(
            topic,
            bootstrap_servers=[f'{HOST_IP}:9092'],
            auto_offset_reset='latest',
            group_id=group_id,
            value_deserializer=lambda x: x.decode('utf-8', errors='ignore'), # Raw string
            api_version=(0, 10),
            request_timeout_ms=65000,
            session_timeout_ms=60000,
            heartbeat_interval_ms=15000,
            enable_auto_commit=True
            # Consider adding max_poll_interval_ms if processing is very bursty
            # max_poll_interval_ms=300000 # e.g., 5 minutes
        )
        print(f"[{topic}] Consumer object created. Subscribing...", flush=True)
        consumer.subscribe([topic])
        time.sleep(5) # Allow assignment
        assignment = consumer.assignment()
        if not assignment: print(f"[{topic}] Warning: No partitions assigned after subscription.", flush=True);
        else: print(f"[{topic}] Connection and subscription SUCCESSFUL. Assignment: {assignment}", flush=True)
        consumers_list.append(consumer)
        return consumer
    except Exception as e:
        print(f"[{topic}] FAILED during connection/subscription: {e}", flush=True); traceback.print_exc()
        if consumer: consumer.close()
        return None

# --- Consumer Thread Function (Keep data_consumer_thread using poll as before) ---
def data_consumer_thread(topic):
    """Thread function using consumer.poll() for better responsiveness."""
    print(f"[{topic}] Data consumer thread started.", flush=True)
    consumer = connect_kafka_consumer_threaded(topic)
    if not consumer: print(f"[{topic}] Thread exiting: connection failure.", flush=True); return

    message_count = 0; error_count = 0; success_count = 0
    time_buckets = ["0-6h", "6-12h", "12-18h", "18-24h"] # Define here
    all_sites = [str(i) for i in range(16)] # Define here
    print(f"[{topic}] Entering polling loop...", flush=True)

    try:
        while not stop_event.is_set():
            msg_pack = consumer.poll(timeout_ms=1000)
            if not msg_pack: continue

            processed_in_batch = 0 # Track processed messages in this poll
            for tp, messages in msg_pack.items():
                for message in messages:
                    message_count += 1
                    raw_value_str = message.value
                    processed_in_batch += 1

                    try:
                        msg = json.loads(raw_value_str)
                        if not isinstance(msg, dict): error_count += 1; continue
                        date_str = msg.get('date')
                        if not date_str: error_count += 1; continue # Skip if no date

                        # --- Process based on topic ---
                        if topic == TOPIC_BUILDING:
                            bldg_id = msg.get('building_id'); time_bucket = msg.get('time'); val_raw = msg.get('total_power_6h')
                            if not all([bldg_id is not None, time_bucket in time_buckets, val_raw is not None]): error_count += 1; continue
                            try: val = float(val_raw)
                            except (ValueError, TypeError): error_count += 1; continue
                            with data_lock: building_data_store[date_str][time_bucket][str(bldg_id)] = val
                            success_count += 1
                        elif topic == TOPIC_SITE:
                            site_id_raw = msg.get('site_id'); val_raw = msg.get('total_power_day')
                            if site_id_raw is None or val_raw is None: error_count += 1; continue
                            try: val = float(val_raw)
                            except (ValueError, TypeError): error_count += 1; continue
                            site_id_str = str(site_id_raw)
                            if site_id_str not in all_sites: error_count += 1; continue
                            with data_lock: site_data_store[date_str][site_id_str] = val
                            success_count += 1
                    # Keep detailed error logging inside loop if needed
                    except json.JSONDecodeError: error_count += 1
                    except Exception: error_count += 1; traceback.print_exc() # Log other processing errors

            # Less frequent stats to reduce overhead
            if success_count > 0 and success_count % 5000 == 0:
                 print(f"[{topic}] Stats - Processed: {message_count}, Succeeded: {success_count}, Failed: {error_count}", flush=True)
            # Add a small sleep if still seeing heartbeats, means processing one batch is too slow
            # time.sleep(0.05)


    except Exception as e_outer_loop: print(f"[{topic}] Error in Polling Loop: {e_outer_loop}", flush=True); traceback.print_exc()
    finally: print(f"[{topic}] Exiting polling loop. Final Counts - Processed: {message_count}, Succeeded: {success_count}, Failed: {error_count}", flush=True)


# --- Plotting Functions (Modified to accept data copies) ---
# Keep is_data_ready_for_date as before
def is_data_ready_for_date(target_date_str):
    with data_lock:
        site_data = site_data_store.get(target_date_str)
        building_data = building_data_store.get(target_date_str)
        site_ready = site_data is not None and any(v > 0 for v in site_data.values())
        building_ready = building_data is not None and all(b in building_data for b in ["0-6h", "6-12h", "12-18h", "18-24h"])
    return site_ready and building_ready

def draw_building_plot(fig, axes, target_date_str, data_for_day_copy): # Accept copied data
    """Draws the 2x2 building plot for a specific target date using COPIED data."""
    global yaxis_max_building # Still update global max
    time_buckets = ["0-6h", "6-12h", "12-18h", "18-24h"]
    ax_map = {"0-6h": axes[0, 0], "6-12h": axes[0, 1], "12-18h": axes[1, 0], "18-24h": axes[1, 1]}
    max_val_found = 0.0

    # Work with the provided copy
    data_for_day = data_for_day_copy

    for bucket in time_buckets:
        ax = ax_map[bucket]
        ax.cla()
        bucket_data = data_for_day.get(bucket, {}) # Use get() on the copy
        # *** This sorting can be slow - happens OUTSIDE the lock now ***
        sorted_items = sorted(bucket_data.items(), key=lambda item: item[1], reverse=True)
        top_8_items = sorted_items[:8]

        if top_8_items:
            labels, values = zip(*top_8_items)
            ax.bar(labels, values)
            ax.set_xticklabels(labels, rotation=75, ha='right')
            max_val_found = max(max_val_found, max(values) if values else 0)

        ax.set_title(f"Time: {bucket}")
        ax.set_ylabel("Total Power (6h)")

    # Update Y axis (safe outside lock as it only reads global max)
    yaxis_max_building = max(yaxis_max_building, max_val_found, 500)
    for ax in axes.flatten():
        ax.set_ylim(bottom=0, top=yaxis_max_building * 1.1)

    fig.suptitle(f"Building Top 8 Power (Date: {target_date_str})", y=1.02)
    # fig.tight_layout called later

def draw_site_plot(fig, axes, target_date_str, prev_date_str, curr_data_copy, prev_data_copy): # Accept copied data
    """Draws the 2x1 site plot using COPIED data."""
    global yaxis_max_site # Still update global max
    ax_prev, ax_curr = axes[0], axes[1]
    all_sites = [str(i) for i in range(16)]

    # Work with provided copies
    prev_data = prev_data_copy
    curr_data = curr_data_copy

    # *** Data extraction happens OUTSIDE the lock now ***
    prev_values = [prev_data.get(site, 0) for site in all_sites]
    curr_values = [curr_data.get(site, 0) for site in all_sites]

    max_val_found = 0.0
    if prev_values: max_val_found = max(max_val_found, max(prev_values))
    if curr_values: max_val_found = max(max_val_found, max(curr_values))

    yaxis_max_site = max(yaxis_max_site, max_val_found, 5000)

    # Plotting (safe outside lock)
    ax_prev.cla()
    ax_prev.bar(all_sites, prev_values, color='gray')
    ax_prev.set_title(f"Previous Day (Date: {prev_date_str})")
    ax_prev.set_ylabel("Total Power (Daily)")
    ax_prev.set_ylim(bottom=0, top=yaxis_max_site * 1.1)
    ax_prev.set_xticks(range(len(all_sites))); ax_prev.set_xticklabels(all_sites)

    ax_curr.cla()
    ax_curr.bar(all_sites, curr_values, color='blue')
    ax_curr.set_title(f"Target Day (Date: {target_date_str})")
    ax_curr.set_xlabel("Site ID")
    ax_curr.set_ylabel("Total Power (Daily)")
    ax_curr.set_ylim(bottom=0, top=yaxis_max_site * 1.1)
    ax_curr.set_xticks(range(len(all_sites))); ax_curr.set_xticklabels(all_sites)

    fig.suptitle("Daily Site Power Usage Comparison", y=1.0)
    # fig.tight_layout called later


# --- Plotter Thread (Optimized Lock Usage) ---
def plotter_and_manager_thread(fig_building, axes_building, fig_site, axes_site):
    """Plots oldest ready date D if D+2 exists, minimizing lock time."""
    print("[Plotter] Plotter thread started.", flush=True)
    last_plotted_date_str = None

    while not stop_event.is_set():
        plot_this_cycle_data = None # Store data needed for plotting if found
        keys_to_prune_b = []
        keys_to_prune_s = []
        target_str_to_plot = None
        prev_str_to_plot = None
        building_dates_count_dbg = 0
        site_dates_count_dbg = 0
        latest_date_in_store_dbg = None

        try:
            # === Start Critical Section (Lock Acquired) ===
            with data_lock:
                # --- Quick check for data and potential target ---
                all_dates_in_bldg = list(building_data_store.keys())
                all_dates_in_site = list(site_data_store.keys())
                all_dates_common = sorted(list(set(all_dates_in_bldg) & set(all_dates_in_site)))

                building_dates_count_dbg = len(all_dates_in_bldg) # For debug
                site_dates_count_dbg = len(all_dates_in_site) # For debug

                potential_target_str = None
                if all_dates_common:
                    # Find the latest date that has data 2 days after it
                    latest_date_str = max(all_dates_common) # Need the actual latest date overall
                    latest_date_in_store_dbg = latest_date_str # For debug

                    date_plus_2_target = (datetime.strptime(latest_date_str, '%Y-%m-%d').date() - timedelta(days=2))

                    # Iterate through common dates from oldest
                    for dt_str in all_dates_common:
                        current_dt = datetime.strptime(dt_str, '%Y-%m-%d').date()
                        if current_dt >= date_plus_2_target: # Stop if we reach the target window
                             break

                        # Check readiness for this date
                        if is_data_ready_for_date(dt_str):
                             # Check if newer than last plotted
                             if last_plotted_date_str is None or dt_str > last_plotted_date_str:
                                 potential_target_str = dt_str
                                 break # Found the oldest, ready, unplotted date satisfying D+2 logic

                # --- If target found, copy data and get keys to prune ---
                if potential_target_str:
                    target_str_to_plot = potential_target_str
                    prev_str_to_plot = (datetime.strptime(target_str_to_plot, '%Y-%m-%d').date() - timedelta(days=1)).isoformat()

                    # ** Deep Copy data needed for plotting **
                    bldg_data_copy = copy.deepcopy(building_data_store.get(target_str_to_plot, {}))
                    site_data_copy = copy.deepcopy(site_data_store.get(target_str_to_plot, {}))
                    site_prev_data_copy = copy.deepcopy(site_data_store.get(prev_str_to_plot, {}))

                    plot_this_cycle_data = {
                        "target_date": target_str_to_plot,
                        "prev_date": prev_str_to_plot,
                        "bldg_data": bldg_data_copy,
                        "site_data": site_data_copy,
                        "site_prev_data": site_prev_data_copy
                    }

                # --- Get keys for pruning (still under lock for consistency) ---
                prune_threshold_date = date.today() - timedelta(days=20)
                prune_threshold_str = prune_threshold_date.isoformat()
                keys_to_prune_b = [k for k in all_dates_in_bldg if k < prune_threshold_str]
                keys_to_prune_s = [k for k in all_dates_in_site if k < prune_threshold_str]

            # === End Critical Section (Lock Released) ===

            # --- Perform Slow Operations (Plotting) Outside Lock ---
            plot_occurred = False
            if plot_this_cycle_data:
                print(f"\n--- Plotting date: {plot_this_cycle_data['target_date']} ---", flush=True)
                draw_building_plot(fig_building, axes_building, plot_this_cycle_data['target_date'], plot_this_cycle_data['bldg_data'])
                draw_site_plot(fig_site, axes_site, plot_this_cycle_data['target_date'], plot_this_cycle_data['prev_date'], plot_this_cycle_data['site_data'], plot_this_cycle_data['site_prev_data'])

                # Redraw canvas (can happen outside lock)
                fig_building.tight_layout(rect=[0, 0.03, 1, 0.97])
                fig_site.tight_layout(rect=[0, 0.03, 1, 0.97])
                fig_building.canvas.draw_idle()
                fig_site.canvas.draw_idle()

                last_plotted_date_str = plot_this_cycle_data['target_date'] # Update last plotted date
                plot_occurred = True
                print(f"--- Plotting complete for {last_plotted_date_str} ---", flush=True)


            # --- Debug Print ---
            print(f"\n--- DEBUGGER @ {datetime.now().strftime('%H:%M:%S')} ---", flush=True)
            print(f"Latest Date in Store: {latest_date_in_store_dbg if latest_date_in_store_dbg else 'N/A'}", flush=True)
            print(f"Target Plotted: {last_plotted_date_str if last_plotted_date_str else 'None'}", flush=True)
            print(f"Building Store Dates Count: {building_dates_count_dbg}", flush=True)
            print(f"Site Store Dates Count: {site_dates_count_dbg}", flush=True)


            # === Start Critical Section for Deletion ===
            deleted_b_plot = False; deleted_s_plot = False
            pruned_b_count = 0; pruned_s_count = 0
            with data_lock:
                 # Remove plotted date's data
                 if plot_occurred:
                     if last_plotted_date_str in building_data_store:
                          del building_data_store[last_plotted_date_str]; deleted_b_plot = True
                     if last_plotted_date_str in site_data_store:
                          del site_data_store[last_plotted_date_str]; deleted_s_plot = True

                 # Prune old keys identified earlier
                 for key in keys_to_prune_b:
                     if key in building_data_store: del building_data_store[key]; pruned_b_count += 1
                 for key in keys_to_prune_s:
                     if key in site_data_store: del site_data_store[key]; pruned_s_count += 1
            # === End Deletion Critical Section ===

            if deleted_b_plot or deleted_s_plot:
                 print(f"--- Removed plotted data for {last_plotted_date_str} (Bldg: {deleted_b_plot}, Site: {deleted_s_plot}) ---", flush=True)
            if pruned_b_count > 0 or pruned_s_count > 0:
                print(f"Pruned {pruned_b_count} bldg / {pruned_s_count} site days (older than {prune_threshold_str}).", flush=True)


            # --- Wait ---
            plt.pause(5.0) # Check every 5 seconds

        except Exception as e:
            print(f"[Plotter Error] An error occurred: {e}. Retrying in 5s.", flush=True)
            traceback.print_exc()
            time.sleep(5)


# --- Main Execution Block (Keep as before) ---
def cleanup_all_consumers():
    print("\n--- Sending stop signal & Cleaning up all consumers ---", flush=True)
    stop_event.set()
    time.sleep(2)
    for c in consumers_list:
        try: c.close()
        except: pass
    print("--- Cleanup complete ---", flush=True)

atexit.register(cleanup_all_consumers)

print("--- Plotter V4 (Optimized Lock, D+2 Logic, Poll) ---", flush=True)
try:
    print("Initializing plots...", flush=True)
    fig_building, axes_building = plt.subplots(2, 2, figsize=(12, 9))
    fig_building.show()
    fig_site, axes_site = plt.subplots(2, 1, figsize=(12, 9))
    fig_site.show()
    print("Plots initialized.", flush=True)

    thread_bldg = Thread(target=data_consumer_thread, args=(TOPIC_BUILDING,), daemon=True)
    thread_site = Thread(target=data_consumer_thread, args=(TOPIC_SITE,), daemon=True)
    thread_plotter = Thread(target=plotter_and_manager_thread, args=(fig_building, axes_building, fig_site, axes_site), daemon=True)

    print("Starting all threads...", flush=True)
    thread_bldg.start()
    thread_site.start()
    thread_plotter.start()

    print("--- All consumer and plotter threads are running. ---", flush=True)
    print("--- Plotter will plot date D when data for D+2 arrives. ---", flush=True)
    print("--- Interrupt the kernel to end. ---", flush=True)

    while not stop_event.wait(timeout=10): pass # Keep main thread alive

except KeyboardInterrupt: print("\n--- Main thread interrupted ---", flush=True)
except Exception as e: print(f"\n--- Main thread error: {e} ---", flush=True); traceback.print_exc()
finally: print("--- Main thread exiting, signaling stop ---", flush=True); stop_event.set()

--- Plotter V4 (Optimized Lock, D+2 Logic, Poll) ---
Initializing plots...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Plots initialized.
Starting all threads...
[building_6h] Data consumer thread started.
[site_daily] Data consumer thread started.
[Plotter] Plotter thread started.
--- All consumer and plotter threads are running. ---
[building_6h] Attempting connection with unique group_id='building_6h-plotter-v4-da6f80bf-41d7-4f82-ac1f-4adaa7bd5b1e'...

--- DEBUGGER @ 02:01:43 ---
[site_daily] Attempting connection with unique group_id='site_daily-plotter-v4-dd062f64-e70a-433b-a1ee-0f7ec0637523'...
Latest Date in Store: N/A--- Plotter will plot date D when data for D+2 arrives. ---

[building_6h] Consumer object created. Subscribing...
Target Plotted: None
--- Interrupt the kernel to end. ---
[site_daily] Consumer object created. Subscribing...
Building Store Dates Count: 0
Site Store Dates Count: 0
[building_6h] Entering polling loop...
[site_daily] Entering polling loop...

--- DEBUGGER @ 02:01:48 ---
Latest Date in Store: N/A
Target Plotted: None
Building Store Dates Count: 0
Site Store Dates Coun

Heartbeat poll expired, leaving group
Heartbeat poll expired, leaving group



--- Main thread interrupted ---
--- Main thread exiting, signaling stop ---


In [1]:
# --- Simple Plotting Debugger V3 (Static X-Axis, Latest Date Logic) ---
from kafka3 import KafkaConsumer # Use kafka-python v3+ if available, otherwise just 'kafka'
import json
import time
from threading import Thread, Event # Removed Lock
import atexit
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from datetime import datetime, timedelta, date
import collections
import traceback
import uuid

# --- Matplotlib Setup ---
%matplotlib notebook

# --- Configuration ---
HOST_IP = "192.168.0.6" # Use your machine's IP
TOPIC_BUILDING = "building_6h"
TOPIC_SITE = "site_daily"

# --- Shared Data Stores (No Lock) ---
# Store full data, keyed by date
building_data_store = {} # {date_str: {time_bucket: {bldg_id: value}}}
site_data_store = {}     # {date_str: {site_id: value}}

# --- Store for latest points for processing (increased size) ---
# Store tuples: (date_str, time_bucket, bldg_id, value)
latest_building_info = collections.deque(maxlen=200)
# Store tuples: (date_str, site_id, value)
latest_site_info = collections.deque(maxlen=200)

consumers_list = [] # Keep track for cleanup
stop_event = Event() # For clean shutdown

# --- Kafka Connection (Keep recent working version) ---
def connect_kafka_consumer_threaded(topic):
    consumer = None
    group_id = f'{topic}-plotter-staticX-{uuid.uuid4()}' # Unique group ID
    print(f"[{topic}] Attempting connection with unique group_id='{group_id}'...", flush=True)
    try:
        consumer = KafkaConsumer(
            topic,
            bootstrap_servers=[f'{HOST_IP}:9092'],
            auto_offset_reset='latest',
            group_id=group_id,
            value_deserializer=lambda x: x.decode('utf-8', errors='ignore'), # Raw string
            api_version=(0, 10),
            request_timeout_ms=65000,
            session_timeout_ms=60000,
            heartbeat_interval_ms=15000,
            enable_auto_commit=True
        )
        print(f"[{topic}] Consumer object created. Subscribing...", flush=True)
        consumer.subscribe([topic])
        time.sleep(5) # Allow assignment
        assignment = consumer.assignment()
        if not assignment: print(f"[{topic}] Warning: No partitions assigned.", flush=True);
        else: print(f"[{topic}] Connection and subscription SUCCESSFUL. Assignment: {assignment}", flush=True)
        consumers_list.append(consumer)
        return consumer
    except Exception as e:
        print(f"[{topic}] FAILED connection/subscription: {e}", flush=True); traceback.print_exc()
        if consumer: consumer.close()
        return None

# --- Consumer Thread Function (Append structured data to deque) ---
def data_consumer_thread(topic):
    """Consumes data using poll, parses, stores, appends structured info to deque."""
    print(f"[{topic}] Data consumer thread started.", flush=True)
    consumer = connect_kafka_consumer_threaded(topic)
    if not consumer: print(f"[{topic}] Thread exiting: connection failure.", flush=True); return

    message_count = 0; error_count = 0; success_count = 0
    time_buckets = ["0-6h", "6-12h", "12-18h", "18-24h"]
    all_sites = [str(i) for i in range(16)]
    print(f"[{topic}] Entering polling loop...", flush=True)

    try:
        while not stop_event.is_set():
            msg_pack = consumer.poll(timeout_ms=1000)
            if not msg_pack: continue

            processed_in_batch = 0
            for tp, messages in msg_pack.items():
                for message in messages:
                    message_count += 1
                    raw_value_str = message.value
                    processed_in_batch += 1

                    try:
                        msg = json.loads(raw_value_str)
                        if not isinstance(msg, dict): error_count += 1; continue
                        date_str = msg.get('date')
                        if not date_str: error_count += 1; continue

                        # --- Process based on topic (NO LOCK) ---
                        if topic == TOPIC_BUILDING:
                            bldg_id_raw = msg.get('building_id'); time_bucket = msg.get('time'); val_raw = msg.get('total_power_6h')
                            if not all([bldg_id_raw is not None, time_bucket in time_buckets, val_raw is not None]): error_count += 1; continue
                            try: val = float(val_raw); bldg_id = str(bldg_id_raw)
                            except (ValueError, TypeError): error_count += 1; continue

                            # Store in main dict (optional for this plotter, but keeps full data)
                            if date_str not in building_data_store: building_data_store[date_str] = collections.defaultdict(dict)
                            building_data_store[date_str][time_bucket][bldg_id] = val
                            # Append structured info to deque
                            latest_building_info.append((date_str, time_bucket, bldg_id, val))
                            success_count += 1

                        elif topic == TOPIC_SITE:
                            site_id_raw = msg.get('site_id'); val_raw = msg.get('total_power_day')
                            if site_id_raw is None or val_raw is None: error_count += 1; continue
                            try: val = float(val_raw); site_id = str(site_id_raw)
                            except (ValueError, TypeError): error_count += 1; continue
                            if site_id not in all_sites: error_count += 1; continue

                            # Store in main dict (optional for this plotter)
                            if date_str not in site_data_store: site_data_store[date_str] = {s: 0 for s in all_sites}
                            site_data_store[date_str][site_id] = val
                            # Append structured info to deque
                            latest_site_info.append((date_str, site_id, val))
                            success_count += 1
                    # Keep detailed error logging inside loop if needed
                    except json.JSONDecodeError: error_count += 1 #print(f"[{topic}] JSON Error (Offset {message.offset})", flush=True);
                    except Exception: error_count += 1; traceback.print_exc()

            if message_count > 0 and message_count % 5000 == 0:
                 print(f"[{topic}] Stats - Processed: {message_count}, Succeeded: {success_count}, Failed: {error_count}", flush=True)

    except Exception as e_outer_loop: print(f"[{topic}] Error in Polling Loop: {e_outer_loop}", flush=True); traceback.print_exc()
    finally: print(f"[{topic}] Exiting polling loop. Final Counts - Processed: {message_count}, Succeeded: {success_count}, Failed: {error_count}", flush=True)


# # --- Plotter and Manager Thread (Static X-Axis, Latest Date) (Forced Event Processing) ---

def static_plotter_manager_thread(fig, ax_bldg, ax_site):
    """Plots data for the most recent date found in SMALLER deques, forcing GUI events."""
    print("[Plotter] Plotter thread started.", flush=True)
    building_x_labels = ["0-6h", "6-12h", "12-18h", "18-24h"]
    site_x_labels = [str(i) for i in range(16)]
    first_plot_done = False # Track if the first plot has rendered

    while not stop_event.is_set():
        start_time = time.time() # Track loop duration
        try:
            # --- Process Deques (Keep this part the same as V3) ---
            most_recent_date_str = None
            building_agg_data = {bucket: 0.0 for bucket in building_x_labels}
            site_agg_data = {site: 0.0 for site in site_x_labels}
            has_data_to_plot = False

            if latest_building_info or latest_site_info:
                all_bldg_dates = [item[0] for item in latest_building_info]
                all_site_dates = [item[0] for item in latest_site_info]
                all_dates = sorted(list(set(all_bldg_dates) | set(all_site_dates)))

                if all_dates:
                    most_recent_date_str = all_dates[-1]
                    # (Aggregation logic for building_agg_data and site_agg_data remains the same)
                    temp_bldg_buckets = collections.defaultdict(list)
                    for date_str, time_bucket, bldg_id, value in latest_building_info:
                        if date_str == most_recent_date_str and time_bucket in building_agg_data: temp_bldg_buckets[time_bucket].append(value)
                    for bucket, values in temp_bldg_buckets.items():
                        if values: building_agg_data[bucket] = sum(values)

                    temp_site_values = {}
                    for date_str, site_id, value in latest_site_info:
                         if date_str == most_recent_date_str and site_id in site_agg_data: temp_site_values[site_id] = value
                    for site_id, value in temp_site_values.items(): site_agg_data[site_id] = value

                    if any(v != 0 for v in building_agg_data.values()) or any(v != 0 for v in site_agg_data.values()):
                         has_data_to_plot = True

            # --- Plotting ---
            if has_data_to_plot:
                ax_bldg.clear(); ax_site.clear() # Clear axes first

                bldg_y_values = [building_agg_data[bucket] for bucket in building_x_labels]
                ax_bldg.bar(building_x_labels, bldg_y_values, label=f'Date: {most_recent_date_str}')
                # (rest of building plot settings)
                ax_bldg.set_title(f"Building Power Sum per Time Bucket")
                ax_bldg.set_ylabel("Total Power (Sum)"); ax_bldg.set_xlabel("Time Bucket")
                ax_bldg.legend(loc='upper left'); ax_bldg.grid(True)
                ax_bldg.yaxis.set_major_formatter(mticker.ScalarFormatter(useMathText=False))
                ax_bldg.ticklabel_format(style='plain', axis='y')

                site_y_values = [site_agg_data[site] for site in site_x_labels]
                ax_site.bar(site_x_labels, site_y_values, color='orange', label=f'Date: {most_recent_date_str}')
                # (rest of site plot settings)
                ax_site.set_title(f"Site Power per Site ID")
                ax_site.set_ylabel("Total Power (Latest)"); ax_site.set_xlabel("Site ID")
                ax_site.legend(loc='upper left'); ax_site.grid(True)
                ax_site.yaxis.set_major_formatter(mticker.ScalarFormatter(useMathText=False))
                ax_site.ticklabel_format(style='plain', axis='y')

                fig.tight_layout(rect=[0, 0.03, 1, 0.95])

                # ** Explicitly draw and process events **
                fig.canvas.draw_idle() # Request redraw
                fig.canvas.flush_events() # Process GUI events NOW
                if not first_plot_done:
                    print("[Plotter] First plot data processed and drawn.", flush=True)
                    first_plot_done = True # Mark that we've drawn at least once

            elif not first_plot_done: # Only show waiting if nothing has ever been plotted
                ax_bldg.clear(); ax_site.clear()
                ax_bldg.set_title("Building Power (Waiting...)")
                ax_site.set_title("Site Power (Waiting...)")
                fig.canvas.draw_idle()
                fig.canvas.flush_events() # Process the clear command

            # --- Debug Print (Keep as is) ---
            current_time_str = datetime.now().strftime('%H:%M:%S')
            if int(current_time_str.split(':')[-1]) % 10 == 0:
                print(f"\n--- DEBUG @ {current_time_str} ---", flush=True)
                # ... (rest of debug printing) ...
                print(f"  Latest Date Processed: {most_recent_date_str if most_recent_date_str else 'N/A'}", flush=True)
                print(f"  Building Deque Size: {len(latest_building_info)}", flush=True)
                print(f"  Site Deque Size: {len(latest_site_info)}", flush=True)


            # --- Wait ---
            # Calculate remaining time to sleep to maintain roughly 2s interval
            elapsed_time = time.time() - start_time
            sleep_time = max(0.1, 2.0 - elapsed_time) # Sleep at least 0.1s
            # Use time.sleep() instead of plt.pause()
            time.sleep(sleep_time)
            # ** After sleeping, process events again **
            fig.canvas.flush_events()


        except Exception as e:
            print(f"[Plotter Error] An error occurred: {e}", flush=True)
            traceback.print_exc()
            time.sleep(2.0) # Wait before retrying

# --- Main Execution Block (Adjusted Figure Setup) ---
def cleanup_all_consumers():
    print("\n--- Sending stop signal & Cleaning up all consumers ---", flush=True)
    stop_event.set(); time.sleep(2)
    for c in consumers_list:
        try: c.close()
        except: pass
    print("--- Cleanup complete ---", flush=True)

atexit.register(cleanup_all_consumers)

print("--- Simple Plotting V3 (Static X, Latest Date) ---", flush=True)
try:
    print("Initializing plot...", flush=True)
    # Use two separate axes, no shared X
    fig, (ax_bldg, ax_site) = plt.subplots(2, 1, figsize=(10, 9)) # Slightly taller
    fig.show()
    print("Plots initialized.", flush=True)

    thread_bldg = Thread(target=data_consumer_thread, args=(TOPIC_BUILDING,), daemon=True)
    thread_site = Thread(target=data_consumer_thread, args=(TOPIC_SITE,), daemon=True)
    thread_plotter = Thread(target=static_plotter_manager_thread, args=(fig, ax_bldg, ax_site), daemon=True)

    print("Starting all threads...", flush=True)
    thread_bldg.start()
    thread_site.start()
    thread_plotter.start()

    print("--- All consumer and plotter threads are running. ---", flush=True)
    print("--- Plotter shows aggregated data for latest date received. ---", flush=True)
    print("--- Interrupt the kernel to end. ---", flush=True)

    while not stop_event.wait(timeout=10): pass # Keep main thread alive

except KeyboardInterrupt: print("\n--- Main thread interrupted ---", flush=True)
except Exception as e: print(f"\n--- Main thread error: {e} ---", flush=True); traceback.print_exc()
finally: print("--- Main thread exiting, signaling stop ---", flush=True); stop_event.set()

--- Simple Plotting V3 (Static X, Latest Date) ---
Initializing plot...


<IPython.core.display.Javascript object>

Plots initialized.
Starting all threads...
[building_6h] Data consumer thread started.
[building_6h] Attempting connection with unique group_id='building_6h-plotter-staticX-21b6e1a7-ff37-4528-9201-51477d34d0b1'...
[site_daily] Data consumer thread started.
[Plotter] Plotter thread started.
--- All consumer and plotter threads are running. ---
--- Plotter shows aggregated data for latest date received. ---
[building_6h] Consumer object created. Subscribing...
[site_daily] Attempting connection with unique group_id='site_daily-plotter-staticX-c087bf3a-0bdd-4d4a-a860-86365f828e50'...
--- Interrupt the kernel to end. ---
[site_daily] Consumer object created. Subscribing...

--- DEBUG @ 06:56:00 ---
  Latest Date Processed: N/A
  Building Deque Size: 0
  Site Deque Size: 0
[building_6h] Entering polling loop...
[site_daily] Entering polling loop...
[Plotter] First plot data processed and drawn.

--- DEBUG @ 06:56:10 ---
  Latest Date Processed: 2022-09-21
  Building Deque Size: 0
  Site Deq

In [1]:
# --- Plotter V5 (Optimized Aggregation, Conditional Plot) ---
from kafka3 import KafkaConsumer # Use kafka-python v3+
import json
import time
from threading import Thread, Event # No Lock needed for this approach
import atexit
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from datetime import datetime, timedelta, date
import collections
import traceback
import uuid

# --- Matplotlib Setup ---
%matplotlib notebook

# --- Configuration ---
HOST_IP = "192.168.0.6" # Use your machine's IP
TOPIC_BUILDING = "building_6h"
TOPIC_SITE = "site_daily"

# --- Shared Data Stores (Optional, mainly for debug/pruning) ---
building_data_store = {}
site_data_store = {}

# --- Store for latest points for processing (REDUCED size) ---
latest_building_info = collections.deque(maxlen=200) # (date_str, time_bucket, bldg_id, value)
latest_site_info = collections.deque(maxlen=200)     # (date_str, site_id, value)

consumers_list = []
stop_event = Event()

# --- Kafka Connection (Keep as is) ---
def connect_kafka_consumer_threaded(topic):
    consumer = None
    group_id = f'{topic}-plotter-opt-{uuid.uuid4()}'
    print(f"[{topic}] Attempting connection with group_id='{group_id}'...", flush=True)
    try:
        consumer = KafkaConsumer(
            topic, bootstrap_servers=[f'{HOST_IP}:9092'], auto_offset_reset='latest',
            group_id=group_id, value_deserializer=lambda x: x.decode('utf-8', errors='ignore'),
            api_version=(0, 10), request_timeout_ms=65000, session_timeout_ms=60000,
            heartbeat_interval_ms=15000, enable_auto_commit=True
        )
        print(f"[{topic}] Subscribing...", flush=True)
        consumer.subscribe([topic])
        time.sleep(5); assignment = consumer.assignment()
        if not assignment: print(f"[{topic}] Warning: No partitions assigned.", flush=True);
        else: print(f"[{topic}] Connection SUCCESSFUL. Assignment: {assignment}", flush=True)
        consumers_list.append(consumer)
        return consumer
    except Exception as e: print(f"[{topic}] FAILED connection: {e}", flush=True); traceback.print_exc(); return None

# --- Consumer Thread Function (ADD SITE DIAGNOSTICS) ---
def data_consumer_thread(topic):
    print(f"[{topic}] Data consumer thread started.", flush=True)
    consumer = connect_kafka_consumer_threaded(topic)
    if not consumer: print(f"[{topic}] Thread exiting: connection failure.", flush=True); return

    message_count = 0; error_count = 0; success_count = 0
    time_buckets = ["0-6h", "6-12h", "12-18h", "18-24h"]
    all_sites = [str(i) for i in range(16)]
    print(f"[{topic}] Entering polling loop...", flush=True)

    try:
        while not stop_event.is_set():
            msg_pack = consumer.poll(timeout_ms=1000) # Poll with timeout
            if not msg_pack: continue # No messages, continue polling

            processed_in_batch = 0
            for tp, messages in msg_pack.items():
                for message in messages:
                    message_count += 1
                    raw_value_str = message.value
                    processed_in_batch += 1
                    msg = None # Ensure msg is defined for error logging

                    try:
                        # --- Site Specific Log ---
                        if topic == TOPIC_SITE: print(f"[{topic}] Before json.loads (Offset {message.offset})", flush=True)
                        msg = json.loads(raw_value_str)
                        # --- Site Specific Log ---
                        if topic == TOPIC_SITE: print(f"[{topic}] After json.loads (Offset {message.offset})", flush=True)

                        if not isinstance(msg, dict): error_count += 1; continue
                        date_str = msg.get('date')
                        if not date_str: error_count += 1; continue

                        if topic == TOPIC_BUILDING:
                            bldg_id_raw = msg.get('building_id'); time_bucket = msg.get('time'); val_raw = msg.get('total_power_6h')
                            if not all([bldg_id_raw is not None, time_bucket in time_buckets, val_raw is not None]): error_count += 1; continue
                            try: val = float(val_raw); bldg_id = str(bldg_id_raw)
                            except (ValueError, TypeError): error_count += 1; continue

                            latest_building_info.append((date_str, time_bucket, bldg_id, val))
                            # Optional: Update main store if needed for other purposes
                            # if date_str not in building_data_store: building_data_store[date_str] = collections.defaultdict(dict)
                            # building_data_store[date_str][time_bucket][bldg_id] = val
                            success_count += 1

                        elif topic == TOPIC_SITE:
                            site_id_raw = msg.get('site_id'); val_raw = msg.get('total_power_day')
                            if site_id_raw is None or val_raw is None: error_count += 1; continue
                            try: val = float(val_raw); site_id = str(site_id_raw)
                            except (ValueError, TypeError): error_count += 1; continue
                            if site_id not in all_sites: error_count += 1; continue

                            # --- Site Specific Log ---
                            print(f"[{topic}] Before append (Offset {message.offset})", flush=True)
                            latest_site_info.append((date_str, site_id, val))
                            # --- Site Specific Log ---
                            print(f"[{topic}] After append (Offset {message.offset})", flush=True)
                            # Optional: Update main store
                            # if date_str not in site_data_store: site_data_store[date_str] = {s: 0 for s in all_sites}
                            # site_data_store[date_str][site_id] = val
                            success_count += 1

                    except json.JSONDecodeError as e:
                         print(f"[{topic}] JSON Error (Offset {message.offset}): {e}", flush=True); error_count += 1
                    except Exception as e_parse:
                         print(f"[{topic}] Processing Error (Offset {message.offset}): {e_parse}. Msg: {msg}", flush=True); traceback.print_exc(); error_count += 1

            # Print stats less frequently
            if message_count > 0 and message_count % 10000 == 0:
                 print(f"[{topic}] Stats - Processed: {message_count}, Succeeded: {success_count}, Failed: {error_count}", flush=True)

    except Exception as e_outer_loop: print(f"[{topic}] Error in Polling Loop: {e_outer_loop}", flush=True); traceback.print_exc()
    finally: print(f"[{topic}] Exiting polling loop. Final Counts - Processed: {message_count}, Succeeded: {success_count}, Failed: {error_count}", flush=True)


# --- Plotting Functions (Keep draw_building_plot & draw_site_plot as before) ---
yaxis_max_building = 500.0
yaxis_max_site = 5000.0
def draw_building_plot(fig, axes, target_date_str, data_for_day_copy):
    global yaxis_max_building; time_buckets = ["0-6h", "6-12h", "12-18h", "18-24h"]; ax_map = {"0-6h": axes[0, 0], "6-12h": axes[0, 1], "12-18h": axes[1, 0], "18-24h": axes[1, 1]}; max_val_found = 0.0; data_for_day = data_for_day_copy
    for bucket in time_buckets:
        ax = ax_map[bucket]; ax.cla(); bucket_data = data_for_day.get(bucket, {}); sorted_items = sorted(bucket_data.items(), key=lambda item: item[1], reverse=True); top_8_items = sorted_items[:8]
        if top_8_items: labels, values = zip(*top_8_items); ax.bar(labels, values); ax.set_xticklabels(labels, rotation=75, ha='right'); max_val_found = max(max_val_found, max(values) if values else 0)
        ax.set_title(f"Time: {bucket}"); ax.set_ylabel("Total Power (6h)")
    yaxis_max_building = max(yaxis_max_building, max_val_found, 500);
    for ax in axes.flatten(): ax.set_ylim(bottom=0, top=yaxis_max_building * 1.1)
    fig.suptitle(f"Building Top 8 Power (Date: {target_date_str})", y=1.02)
def draw_site_plot(fig, axes, target_date_str, prev_date_str, curr_data_copy, prev_data_copy):
    global yaxis_max_site; ax_prev, ax_curr = axes[0], axes[1]; all_sites = [str(i) for i in range(16)]; prev_data = prev_data_copy; curr_data = curr_data_copy
    prev_values = [prev_data.get(site, 0) for site in all_sites]; curr_values = [curr_data.get(site, 0) for site in all_sites]; max_val_found = 0.0
    if prev_values: max_val_found = max(max_val_found, max(prev_values));
    if curr_values: max_val_found = max(max_val_found, max(curr_values));
    yaxis_max_site = max(yaxis_max_site, max_val_found, 5000)
    ax_prev.cla(); ax_prev.bar(all_sites, prev_values, color='gray'); ax_prev.set_title(f"Previous Day (Date: {prev_date_str})"); ax_prev.set_ylabel("Total Power (Daily)"); ax_prev.set_ylim(bottom=0, top=yaxis_max_site * 1.1); ax_prev.set_xticks(range(len(all_sites))); ax_prev.set_xticklabels(all_sites)
    ax_curr.cla(); ax_curr.bar(all_sites, curr_values, color='blue'); ax_curr.set_title(f"Target Day (Date: {target_date_str})"); ax_curr.set_xlabel("Site ID"); ax_curr.set_ylabel("Total Power (Daily)"); ax_curr.set_ylim(bottom=0, top=yaxis_max_site * 1.1); ax_curr.set_xticks(range(len(all_sites))); ax_curr.set_xticklabels(all_sites)
    fig.suptitle("Daily Site Power Usage Comparison", y=1.0)

# --- Plotter Thread (Optimized Aggregation, Conditional Plot) ---
def optimized_plotter_thread(fig_building, axes_building, fig_site, axes_site):
    """Aggregates deques efficiently, plots only when latest date changes."""
    print("[Plotter] Plotter thread started.", flush=True)
    building_x_labels = ["0-6h", "6-12h", "12-18h", "18-24h"]
    site_x_labels = [str(i) for i in range(16)]
    last_plotted_latest_date = None # Track the latest date plotted

    while not stop_event.is_set():
        try:
            # --- Single Pass Aggregation (No Lock) ---
            aggregated_building = collections.defaultdict(lambda: {b: [] for b in building_x_labels}) # {date: {bucket: [values]}}
            aggregated_site = collections.defaultdict(dict) # {date: {site: latest_value}}
            current_latest_date = None

            # Process building deque
            local_bldg_deque = list(latest_building_info) # Quick copy for iteration
            for date_str, time_bucket, bldg_id, value in local_bldg_deque:
                if time_bucket in aggregated_building[date_str]:
                    aggregated_building[date_str][time_bucket].append(value)
                if current_latest_date is None or date_str > current_latest_date:
                    current_latest_date = date_str

            # Process site deque
            local_site_deque = list(latest_site_info) # Quick copy for iteration
            for date_str, site_id, value in local_site_deque:
                aggregated_site[date_str][site_id] = value # Keep latest value
                if current_latest_date is None or date_str > current_latest_date:
                    current_latest_date = date_str

            # --- Check if we should plot ---
            if current_latest_date and current_latest_date != last_plotted_latest_date:
                print(f"\n--- New latest date detected: {current_latest_date}. Preparing plot... ---", flush=True)

                # Prepare data for plotting for the current_latest_date
                bldg_data_to_plot = {b: sum(aggregated_building[current_latest_date][b]) for b in building_x_labels} # Calculate sums
                site_data_to_plot = aggregated_site[current_latest_date] # Already has latest values
                # Get previous date's site data (use get for safety)
                prev_date_plot = (datetime.strptime(current_latest_date, '%Y-%m-%d').date() - timedelta(days=1)).isoformat()
                site_prev_data_to_plot = aggregated_site.get(prev_date_plot, {}) # Use aggregated data

                # --- Plotting ---
                # ** Simulate draw functions directly for simplicity **

                # Building Plot Update
                axes_building[0,0].cla(); axes_building[0,1].cla(); axes_building[1,0].cla(); axes_building[1,1].cla() # Clear all
                ax_map = {"0-6h": axes_building[0, 0], "6-12h": axes_building[0, 1], "12-18h": axes_building[1, 0], "18-24h": axes_building[1, 1]}
                temp_bldg_data_for_top8 = {} # Need to aggregate for top 8 within each bucket for this specific date
                for date_s, bucket, bldg_id, val in local_bldg_deque:
                    if date_s == current_latest_date:
                       if bucket not in temp_bldg_data_for_top8: temp_bldg_data_for_top8[bucket] = {}
                       temp_bldg_data_for_top8[bucket][bldg_id] = val # Store individual values

                max_val_found_bldg = 0
                for bucket in building_x_labels:
                    ax = ax_map[bucket]
                    bucket_data_indiv = temp_bldg_data_for_top8.get(bucket, {})
                    sorted_items = sorted(bucket_data_indiv.items(), key=lambda item: item[1], reverse=True)[:8]
                    if sorted_items: labels, values = zip(*sorted_items); ax.bar(labels, values); ax.set_xticklabels(labels, rotation=75, ha='right'); max_val_found_bldg = max(max_val_found_bldg, max(values) if values else 0)
                    ax.set_title(f"Time: {bucket}"); ax.set_ylabel("Total Power (6h)")

                global yaxis_max_building; yaxis_max_building = max(yaxis_max_building, max_val_found_bldg, 500)
                for ax in axes_building.flatten(): ax.set_ylim(bottom=0, top=yaxis_max_building * 1.1)
                fig_building.suptitle(f"Building Top 8 Power (Date: {current_latest_date})", y=1.02)

                # Site Plot Update
                ax_prev_site, ax_curr_site = axes_site[0], axes_site[1]
                site_curr_values = [site_data_to_plot.get(site, 0) for site in site_x_labels]
                site_prev_values = [site_prev_data_to_plot.get(site, 0) for site in site_x_labels]
                max_val_found_site = 0
                if site_prev_values: max_val_found_site = max(max_val_found_site, max(site_prev_values))
                if site_curr_values: max_val_found_site = max(max_val_found_site, max(site_curr_values))
                global yaxis_max_site; yaxis_max_site = max(yaxis_max_site, max_val_found_site, 5000)

                ax_prev_site.cla(); ax_prev_site.bar(site_x_labels, site_prev_values, color='gray'); ax_prev_site.set_title(f"Previous Day ({prev_date_plot})"); ax_prev_site.set_ylabel("Total Power"); ax_prev_site.set_ylim(bottom=0, top=yaxis_max_site * 1.1); ax_prev_site.set_xticks(range(len(site_x_labels))); ax_prev_site.set_xticklabels(site_x_labels)
                ax_curr_site.cla(); ax_curr_site.bar(site_x_labels, site_curr_values, color='blue'); ax_curr_site.set_title(f"Target Day ({current_latest_date})"); ax_curr_site.set_xlabel("Site ID"); ax_curr_site.set_ylabel("Total Power"); ax_curr_site.set_ylim(bottom=0, top=yaxis_max_site * 1.1); ax_curr_site.set_xticks(range(len(site_x_labels))); ax_curr_site.set_xticklabels(site_x_labels)
                fig_site.suptitle("Daily Site Power Usage Comparison", y=1.0)


                # Redraw
                fig_building.tight_layout(rect=[0, 0.03, 1, 0.97])
                fig_site.tight_layout(rect=[0, 0.03, 1, 0.97])
                fig_building.canvas.draw_idle()
                fig_site.canvas.draw_idle()

                last_plotted_latest_date = current_latest_date # Update last plotted date
                print(f"--- Plots updated for date: {current_latest_date} ---", flush=True)


            # --- Debug Print (Less Frequent) ---
            current_time_s = datetime.now().strftime('%S')
            if int(current_time_s) % 10 < 2: # Print near seconds 00, 10, 20, etc.
                print(f"\n--- Plotter Loop @ {datetime.now().strftime('%H:%M:%S')} ---", flush=True)
                print(f"  Latest Date in Deques: {current_latest_date if current_latest_date else 'N/A'}", flush=True)
                print(f"  Last Plotted Date: {last_plotted_latest_date if last_plotted_latest_date else 'None'}", flush=True)
                print(f"  Building Deque Size: {len(latest_building_info)}", flush=True)
                print(f"  Site Deque Size: {len(latest_site_info)}", flush=True)
                # Optional: Pruning logic for main stores if using them


            # --- Wait ---
            plt.pause(2.0) # Check every 2 seconds

        except Exception as e:
            print(f"[Plotter Error] An error occurred: {e}", flush=True)
            traceback.print_exc()
            time.sleep(2.0)


# --- Main Execution Block & Cleanup (Keep as before) ---
def cleanup_all_consumers():
    print("\n--- Sending stop signal & Cleaning up all consumers ---", flush=True)
    stop_event.set(); time.sleep(2)
    for c in consumers_list:
        try: c.close()
        except: pass
    print("--- Cleanup complete ---", flush=True)

atexit.register(cleanup_all_consumers)

print("--- Plotter V5 (Optimized Aggregation, Conditional Plot) ---", flush=True)
try:
    print("Initializing plots...", flush=True)
    fig_building, axes_building = plt.subplots(2, 2, figsize=(12, 9)) # Reverted to 2x2 for buildings
    fig_building.show()
    fig_site, axes_site = plt.subplots(2, 1, figsize=(10, 9)) # Keep 2x1 for sites
    fig_site.show()
    print("Plots initialized.", flush=True)

    thread_bldg = Thread(target=data_consumer_thread, args=(TOPIC_BUILDING,), daemon=True)
    thread_site = Thread(target=data_consumer_thread, args=(TOPIC_SITE,), daemon=True)
    thread_plotter = Thread(target=optimized_plotter_thread, args=(fig_building, axes_building, fig_site, axes_site), daemon=True) # Use new plotter function

    print("Starting all threads...", flush=True)
    thread_bldg.start()
    thread_site.start()
    thread_plotter.start()

    print("--- All consumer and plotter threads are running. ---", flush=True)
    print("--- Plotter shows aggregated data for latest date when it changes. ---", flush=True)
    print("--- Interrupt the kernel to end. ---", flush=True)

    while not stop_event.wait(timeout=10): pass

except KeyboardInterrupt: print("\n--- Main thread interrupted ---", flush=True)
except Exception as e: print(f"\n--- Main thread error: {e} ---", flush=True); traceback.print_exc()
finally: print("--- Main thread exiting, signaling stop ---", flush=True); stop_event.set()

--- Plotter V5 (Optimized Aggregation, Conditional Plot) ---
Initializing plots...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Plots initialized.
Starting all threads...
[building_6h] Data consumer thread started.
[site_daily] Data consumer thread started.
[building_6h] Attempting connection with group_id='building_6h-plotter-opt-cf2aeb20-070e-4a0d-b210-b10d698431a4'...
[Plotter] Plotter thread started.
--- All consumer and plotter threads are running. ---
[site_daily] Attempting connection with group_id='site_daily-plotter-opt-1d0c049a-8e9b-4a69-9a16-0de14e14ab10'...
[building_6h] Subscribing...
--- Plotter shows aggregated data for latest date when it changes. ---
[site_daily] Subscribing...
--- Interrupt the kernel to end. ---

--- Plotter Loop @ 06:16:40 ---
  Latest Date in Deques: N/A
  Last Plotted Date: None
  Building Deque Size: 0
  Site Deque Size: 0
[building_6h] Entering polling loop...
[site_daily] Entering polling loop...

--- New latest date detected: 2022-04-27. Preparing plot... ---


  if sorted_items: labels, values = zip(*sorted_items); ax.bar(labels, values); ax.set_xticklabels(labels, rotation=75, ha='right'); max_val_found_bldg = max(max_val_found_bldg, max(values) if values else 0)
  if sorted_items: labels, values = zip(*sorted_items); ax.bar(labels, values); ax.set_xticklabels(labels, rotation=75, ha='right'); max_val_found_bldg = max(max_val_found_bldg, max(values) if values else 0)
  if sorted_items: labels, values = zip(*sorted_items); ax.bar(labels, values); ax.set_xticklabels(labels, rotation=75, ha='right'); max_val_found_bldg = max(max_val_found_bldg, max(values) if values else 0)


--- Plots updated for date: 2022-04-27 ---

--- Plotter Loop @ 06:16:51 ---
  Latest Date in Deques: 2022-04-27
  Last Plotted Date: 2022-04-27
  Building Deque Size: 200
  Site Deque Size: 0
[site_daily] Before json.loads (Offset 161185)
[site_daily] After json.loads (Offset 161185)
[site_daily] Before append (Offset 161185)
[site_daily] After append (Offset 161185)
[site_daily] Before json.loads (Offset 161186)
[site_daily] After json.loads (Offset 161186)
[site_daily] Before append (Offset 161186)
[site_daily] After append (Offset 161186)
[site_daily] Before json.loads (Offset 161187)
[site_daily] After json.loads (Offset 161187)
[site_daily] Before append (Offset 161187)
[site_daily] After append (Offset 161187)
[site_daily] Before json.loads (Offset 161188)
[site_daily] After json.loads (Offset 161188)
[site_daily] Before append (Offset 161188)
[site_daily] After append (Offset 161188)
[site_daily] Before json.loads (Offset 161189)
[site_daily] After json.loads (Offset 161189)
[si

[site_daily] Before json.loads (Offset 161230)
[site_daily] After json.loads (Offset 161230)
[site_daily] Before append (Offset 161230)
[site_daily] After append (Offset 161230)
[site_daily] Before json.loads (Offset 161231)
[site_daily] After json.loads (Offset 161231)
[site_daily] Before append (Offset 161231)
[site_daily] After append (Offset 161231)
[site_daily] Before json.loads (Offset 161232)
[site_daily] After json.loads (Offset 161232)
[site_daily] Before append (Offset 161232)
[site_daily] After append (Offset 161232)
[site_daily] Before json.loads (Offset 161233)
[site_daily] After json.loads (Offset 161233)
[site_daily] Before append (Offset 161233)
[site_daily] After append (Offset 161233)
[site_daily] Before json.loads (Offset 161234)
[site_daily] After json.loads (Offset 161234)
[site_daily] Before append (Offset 161234)
[site_daily] After append (Offset 161234)
[site_daily] Before json.loads (Offset 161235)
[site_daily] After json.loads (Offset 161235)
[site_daily] Befo

[site_daily] After json.loads (Offset 161276)
[site_daily] Before append (Offset 161276)
[site_daily] After append (Offset 161276)
[site_daily] Before json.loads (Offset 161277)
[site_daily] After json.loads (Offset 161277)
[site_daily] Before append (Offset 161277)
[site_daily] After append (Offset 161277)
[site_daily] Before json.loads (Offset 161278)
[site_daily] After json.loads (Offset 161278)
[site_daily] Before append (Offset 161278)
[site_daily] After append (Offset 161278)
[site_daily] Before json.loads (Offset 161279)
[site_daily] After json.loads (Offset 161279)
[site_daily] Before append (Offset 161279)
[site_daily] After append (Offset 161279)
[site_daily] Before json.loads (Offset 161280)
[site_daily] After json.loads (Offset 161280)
[site_daily] Before append (Offset 161280)
[site_daily] After append (Offset 161280)
[site_daily] Before json.loads (Offset 161281)
[site_daily] After json.loads (Offset 161281)
[site_daily] Before append (Offset 161281)
[site_daily] After ap

[site_daily] Before json.loads (Offset 161321)
[site_daily] After json.loads (Offset 161321)
[site_daily] Before append (Offset 161321)
[site_daily] After append (Offset 161321)
[site_daily] Before json.loads (Offset 161322)
[site_daily] After json.loads (Offset 161322)
[site_daily] Before append (Offset 161322)
[site_daily] After append (Offset 161322)
[site_daily] Before json.loads (Offset 161323)
[site_daily] After json.loads (Offset 161323)
[site_daily] Before append (Offset 161323)
[site_daily] After append (Offset 161323)
[site_daily] Before json.loads (Offset 161324)
[site_daily] After json.loads (Offset 161324)
[site_daily] Before append (Offset 161324)
[site_daily] After append (Offset 161324)
[site_daily] Before json.loads (Offset 161325)
[site_daily] After json.loads (Offset 161325)
[site_daily] Before append (Offset 161325)
[site_daily] After append (Offset 161325)
[site_daily] Before json.loads (Offset 161326)
[site_daily] After json.loads (Offset 161326)
[site_daily] Befo

[site_daily] After json.loads (Offset 161367)
[site_daily] Before append (Offset 161367)
[site_daily] After append (Offset 161367)
[site_daily] Before json.loads (Offset 161368)
[site_daily] After json.loads (Offset 161368)
[site_daily] Before append (Offset 161368)
[site_daily] After append (Offset 161368)
[site_daily] Before json.loads (Offset 161369)
[site_daily] After json.loads (Offset 161369)
[site_daily] Before append (Offset 161369)
[site_daily] After append (Offset 161369)
[site_daily] Before json.loads (Offset 161370)
[site_daily] After json.loads (Offset 161370)
[site_daily] Before append (Offset 161370)
[site_daily] After append (Offset 161370)
[site_daily] Before json.loads (Offset 161371)
[site_daily] After json.loads (Offset 161371)
[site_daily] Before append (Offset 161371)
[site_daily] After append (Offset 161371)
[site_daily] Before json.loads (Offset 161372)
[site_daily] After json.loads (Offset 161372)
[site_daily] Before append (Offset 161372)
[site_daily] After ap

  if sorted_items: labels, values = zip(*sorted_items); ax.bar(labels, values); ax.set_xticklabels(labels, rotation=75, ha='right'); max_val_found_bldg = max(max_val_found_bldg, max(values) if values else 0)


--- Plots updated for date: 2022-05-01 ---

--- Plotter Loop @ 06:17:41 ---
  Latest Date in Deques: 2022-05-01
  Last Plotted Date: 2022-05-01
  Building Deque Size: 200
  Site Deque Size: 200

--- Plotter Loop @ 06:17:51 ---
  Latest Date in Deques: 2022-05-01
  Last Plotted Date: 2022-05-01
  Building Deque Size: 200
  Site Deque Size: 200
[site_daily] Before json.loads (Offset 161391)
[site_daily] After json.loads (Offset 161391)
[site_daily] Before append (Offset 161391)
[site_daily] After append (Offset 161391)
[site_daily] Before json.loads (Offset 161392)
[site_daily] After json.loads (Offset 161392)
[site_daily] Before append (Offset 161392)
[site_daily] After append (Offset 161392)
[site_daily] Before json.loads (Offset 161393)
[site_daily] After json.loads (Offset 161393)
[site_daily] Before append (Offset 161393)
[site_daily] After append (Offset 161393)
[site_daily] Before json.loads (Offset 161394)
[site_daily] After json.loads (Offset 161394)
[site_daily] Before append (

[site_daily] After json.loads (Offset 161435)
[site_daily] Before append (Offset 161435)
[site_daily] After append (Offset 161435)
[site_daily] Before json.loads (Offset 161436)
[site_daily] After json.loads (Offset 161436)
[site_daily] Before append (Offset 161436)
[site_daily] After append (Offset 161436)
[site_daily] Before json.loads (Offset 161437)
[site_daily] After json.loads (Offset 161437)
[site_daily] Before append (Offset 161437)
[site_daily] After append (Offset 161437)
[site_daily] Before json.loads (Offset 161438)
[site_daily] After json.loads (Offset 161438)
[site_daily] Before append (Offset 161438)
[site_daily] After append (Offset 161438)
[site_daily] Before json.loads (Offset 161439)
[site_daily] After json.loads (Offset 161439)
[site_daily] Before append (Offset 161439)
[site_daily] After append (Offset 161439)
[site_daily] Before json.loads (Offset 161440)
[site_daily] After json.loads (Offset 161440)
[site_daily] Before append (Offset 161440)
[site_daily] After ap

[site_daily] Before json.loads (Offset 161481)
[site_daily] After json.loads (Offset 161481)
[site_daily] Before append (Offset 161481)
[site_daily] After append (Offset 161481)
[site_daily] Before json.loads (Offset 161482)
[site_daily] After json.loads (Offset 161482)
[site_daily] Before append (Offset 161482)
[site_daily] After append (Offset 161482)
[site_daily] Before json.loads (Offset 161483)
[site_daily] After json.loads (Offset 161483)
[site_daily] Before append (Offset 161483)
[site_daily] After append (Offset 161483)
[site_daily] Before json.loads (Offset 161484)
[site_daily] After json.loads (Offset 161484)
[site_daily] Before append (Offset 161484)
[site_daily] After append (Offset 161484)
[site_daily] Before json.loads (Offset 161485)
[site_daily] After json.loads (Offset 161485)
[site_daily] Before append (Offset 161485)
[site_daily] After append (Offset 161485)
[site_daily] Before json.loads (Offset 161486)
[site_daily] After json.loads (Offset 161486)
[site_daily] Befo

[site_daily] Before json.loads (Offset 161526)
[site_daily] After json.loads (Offset 161526)
[site_daily] Before append (Offset 161526)
[site_daily] After append (Offset 161526)
[site_daily] Before json.loads (Offset 161527)
[site_daily] After json.loads (Offset 161527)
[site_daily] Before append (Offset 161527)
[site_daily] After append (Offset 161527)
[site_daily] Before json.loads (Offset 161528)
[site_daily] After json.loads (Offset 161528)
[site_daily] Before append (Offset 161528)
[site_daily] After append (Offset 161528)
[site_daily] Before json.loads (Offset 161529)
[site_daily] After json.loads (Offset 161529)
[site_daily] Before append (Offset 161529)
[site_daily] After append (Offset 161529)
[site_daily] Before json.loads (Offset 161530)
[site_daily] After json.loads (Offset 161530)
[site_daily] Before append (Offset 161530)
[site_daily] After append (Offset 161530)
[site_daily] Before json.loads (Offset 161531)
[site_daily] After json.loads (Offset 161531)
[site_daily] Befo

[site_daily] After json.loads (Offset 161572)
[site_daily] Before append (Offset 161572)
[site_daily] After append (Offset 161572)
[site_daily] Before json.loads (Offset 161573)
[site_daily] After json.loads (Offset 161573)
[site_daily] Before append (Offset 161573)
[site_daily] After append (Offset 161573)
[site_daily] Before json.loads (Offset 161574)
[site_daily] After json.loads (Offset 161574)
[site_daily] Before append (Offset 161574)
[site_daily] After append (Offset 161574)
[site_daily] Before json.loads (Offset 161575)
[site_daily] After json.loads (Offset 161575)
[site_daily] Before append (Offset 161575)
[site_daily] After append (Offset 161575)
[site_daily] Before json.loads (Offset 161576)
[site_daily] After json.loads (Offset 161576)
[site_daily] Before append (Offset 161576)
[site_daily] After append (Offset 161576)
[site_daily] Before json.loads (Offset 161577)
[site_daily] After json.loads (Offset 161577)
[site_daily] Before append (Offset 161577)
[site_daily] After ap

[site_daily] Before append (Offset 161618)
[site_daily] After append (Offset 161618)
[site_daily] Before json.loads (Offset 161619)
[site_daily] After json.loads (Offset 161619)
[site_daily] Before append (Offset 161619)
[site_daily] After append (Offset 161619)
[site_daily] Before json.loads (Offset 161620)
[site_daily] After json.loads (Offset 161620)
[site_daily] Before append (Offset 161620)
[site_daily] After append (Offset 161620)
[site_daily] Before json.loads (Offset 161621)
[site_daily] After json.loads (Offset 161621)
[site_daily] Before append (Offset 161621)
[site_daily] After append (Offset 161621)
[site_daily] Before json.loads (Offset 161622)
[site_daily] After json.loads (Offset 161622)
[site_daily] Before append (Offset 161622)
[site_daily] After append (Offset 161622)
[site_daily] Before json.loads (Offset 161623)
[site_daily] After json.loads (Offset 161623)
[site_daily] Before append (Offset 161623)
[site_daily] After append (Offset 161623)
[site_daily] Before json.

  if sorted_items: labels, values = zip(*sorted_items); ax.bar(labels, values); ax.set_xticklabels(labels, rotation=75, ha='right'); max_val_found_bldg = max(max_val_found_bldg, max(values) if values else 0)
  if sorted_items: labels, values = zip(*sorted_items); ax.bar(labels, values); ax.set_xticklabels(labels, rotation=75, ha='right'); max_val_found_bldg = max(max_val_found_bldg, max(values) if values else 0)


--- Plots updated for date: 2022-05-04 ---

--- Plotter Loop @ 06:18:31 ---
  Latest Date in Deques: 2022-05-04
  Last Plotted Date: 2022-05-04
  Building Deque Size: 200
  Site Deque Size: 200

--- Plotter Loop @ 06:18:41 ---
  Latest Date in Deques: 2022-05-04
  Last Plotted Date: 2022-05-04
  Building Deque Size: 200
  Site Deque Size: 200
[site_daily] Before json.loads (Offset 161625)
[site_daily] After json.loads (Offset 161625)
[site_daily] Before append (Offset 161625)
[site_daily] After append (Offset 161625)
[site_daily] Before json.loads (Offset 161626)
[site_daily] After json.loads (Offset 161626)
[site_daily] Before append (Offset 161626)
[site_daily] After append (Offset 161626)
[site_daily] Before json.loads (Offset 161627)
[site_daily] After json.loads (Offset 161627)
[site_daily] Before append (Offset 161627)
[site_daily] After append (Offset 161627)
[site_daily] Before json.loads (Offset 161628)
[site_daily] After json.loads (Offset 161628)
[site_daily] Before append (

[site_daily] After append (Offset 161668)
[site_daily] Before json.loads (Offset 161669)
[site_daily] After json.loads (Offset 161669)
[site_daily] Before append (Offset 161669)
[site_daily] After append (Offset 161669)
[site_daily] Before json.loads (Offset 161670)
[site_daily] After json.loads (Offset 161670)
[site_daily] Before append (Offset 161670)
[site_daily] After append (Offset 161670)
[site_daily] Before json.loads (Offset 161671)
[site_daily] After json.loads (Offset 161671)
[site_daily] Before append (Offset 161671)
[site_daily] After append (Offset 161671)
[site_daily] Before json.loads (Offset 161672)
[site_daily] After json.loads (Offset 161672)
[site_daily] Before append (Offset 161672)
[site_daily] After append (Offset 161672)
[site_daily] Before json.loads (Offset 161673)
[site_daily] After json.loads (Offset 161673)
[site_daily] Before append (Offset 161673)
[site_daily] After append (Offset 161673)
[site_daily] Before json.loads (Offset 161674)
[site_daily] After js

[site_daily] Before json.loads (Offset 161715)
[site_daily] After json.loads (Offset 161715)
[site_daily] Before append (Offset 161715)
[site_daily] After append (Offset 161715)
[site_daily] Before json.loads (Offset 161716)
[site_daily] After json.loads (Offset 161716)
[site_daily] Before append (Offset 161716)
[site_daily] After append (Offset 161716)
[site_daily] Before json.loads (Offset 161717)
[site_daily] After json.loads (Offset 161717)
[site_daily] Before append (Offset 161717)
[site_daily] After append (Offset 161717)
[site_daily] Before json.loads (Offset 161718)
[site_daily] After json.loads (Offset 161718)
[site_daily] Before append (Offset 161718)
[site_daily] After append (Offset 161718)
[site_daily] Before json.loads (Offset 161719)
[site_daily] After json.loads (Offset 161719)
[site_daily] Before append (Offset 161719)
[site_daily] After append (Offset 161719)
[site_daily] Before json.loads (Offset 161720)
[site_daily] After json.loads (Offset 161720)
[site_daily] Befo

[site_daily] Before json.loads (Offset 161761)
[site_daily] After json.loads (Offset 161761)
[site_daily] Before append (Offset 161761)
[site_daily] After append (Offset 161761)
[site_daily] Before json.loads (Offset 161762)
[site_daily] After json.loads (Offset 161762)
[site_daily] Before append (Offset 161762)
[site_daily] After append (Offset 161762)
[site_daily] Before json.loads (Offset 161763)
[site_daily] After json.loads (Offset 161763)
[site_daily] Before append (Offset 161763)
[site_daily] After append (Offset 161763)
[site_daily] Before json.loads (Offset 161764)
[site_daily] After json.loads (Offset 161764)
[site_daily] Before append (Offset 161764)
[site_daily] After append (Offset 161764)
[site_daily] Before json.loads (Offset 161765)
[site_daily] After json.loads (Offset 161765)
[site_daily] Before append (Offset 161765)
[site_daily] After append (Offset 161765)
[site_daily] Before json.loads (Offset 161766)
[site_daily] After json.loads (Offset 161766)
[site_daily] Befo

[site_daily] After json.loads (Offset 161807)
[site_daily] Before append (Offset 161807)
[site_daily] After append (Offset 161807)
[site_daily] Before json.loads (Offset 161808)
[site_daily] After json.loads (Offset 161808)
[site_daily] Before append (Offset 161808)
[site_daily] After append (Offset 161808)
[site_daily] Before json.loads (Offset 161809)
[site_daily] After json.loads (Offset 161809)
[site_daily] Before append (Offset 161809)
[site_daily] After append (Offset 161809)
[site_daily] Before json.loads (Offset 161810)
[site_daily] After json.loads (Offset 161810)
[site_daily] Before append (Offset 161810)
[site_daily] After append (Offset 161810)
[site_daily] Before json.loads (Offset 161811)
[site_daily] After json.loads (Offset 161811)
[site_daily] Before append (Offset 161811)
[site_daily] After append (Offset 161811)
[site_daily] Before json.loads (Offset 161812)
[site_daily] After json.loads (Offset 161812)
[site_daily] Before append (Offset 161812)
[site_daily] After ap

  if sorted_items: labels, values = zip(*sorted_items); ax.bar(labels, values); ax.set_xticklabels(labels, rotation=75, ha='right'); max_val_found_bldg = max(max_val_found_bldg, max(values) if values else 0)


--- Plots updated for date: 2022-05-08 ---

--- Plotter Loop @ 06:19:40 ---
  Latest Date in Deques: 2022-05-08
  Last Plotted Date: 2022-05-08
  Building Deque Size: 200
  Site Deque Size: 200

--- Plotter Loop @ 06:19:50 ---
  Latest Date in Deques: 2022-05-08
  Last Plotted Date: 2022-05-08
  Building Deque Size: 200
  Site Deque Size: 200
[site_daily] Before json.loads (Offset 161839)
[site_daily] After json.loads (Offset 161839)
[site_daily] Before append (Offset 161839)
[site_daily] After append (Offset 161839)
[site_daily] Before json.loads (Offset 161840)
[site_daily] After json.loads (Offset 161840)
[site_daily] Before append (Offset 161840)
[site_daily] After append (Offset 161840)
[site_daily] Before json.loads (Offset 161841)
[site_daily] After json.loads (Offset 161841)
[site_daily] Before append (Offset 161841)
[site_daily] After append (Offset 161841)
[site_daily] Before json.loads (Offset 161842)
[site_daily] After json.loads (Offset 161842)
[site_daily] Before append (

[site_daily] After json.loads (Offset 161883)
[site_daily] Before append (Offset 161883)
[site_daily] After append (Offset 161883)
[site_daily] Before json.loads (Offset 161884)
[site_daily] After json.loads (Offset 161884)
[site_daily] Before append (Offset 161884)
[site_daily] After append (Offset 161884)
[site_daily] Before json.loads (Offset 161885)
[site_daily] After json.loads (Offset 161885)
[site_daily] Before append (Offset 161885)
[site_daily] After append (Offset 161885)
[site_daily] Before json.loads (Offset 161886)
[site_daily] After json.loads (Offset 161886)
[site_daily] Before append (Offset 161886)
[site_daily] After append (Offset 161886)
[site_daily] Before json.loads (Offset 161887)
[site_daily] After json.loads (Offset 161887)
[site_daily] Before append (Offset 161887)
[site_daily] After append (Offset 161887)
[site_daily] Before json.loads (Offset 161888)
[site_daily] After json.loads (Offset 161888)
[site_daily] Before append (Offset 161888)
[site_daily] After ap

[site_daily] Before append (Offset 161929)
[site_daily] After append (Offset 161929)
[site_daily] Before json.loads (Offset 161930)
[site_daily] After json.loads (Offset 161930)
[site_daily] Before append (Offset 161930)
[site_daily] After append (Offset 161930)
[site_daily] Before json.loads (Offset 161931)
[site_daily] After json.loads (Offset 161931)
[site_daily] Before append (Offset 161931)
[site_daily] After append (Offset 161931)
[site_daily] Before json.loads (Offset 161932)
[site_daily] After json.loads (Offset 161932)
[site_daily] Before append (Offset 161932)
[site_daily] After append (Offset 161932)
[site_daily] Before json.loads (Offset 161933)
[site_daily] After json.loads (Offset 161933)
[site_daily] Before append (Offset 161933)
[site_daily] After append (Offset 161933)
[site_daily] Before json.loads (Offset 161934)
[site_daily] After json.loads (Offset 161934)
[site_daily] Before append (Offset 161934)
[site_daily] After append (Offset 161934)
[site_daily] Before json.

[site_daily] After append (Offset 161975)
[site_daily] Before json.loads (Offset 161976)
[site_daily] After json.loads (Offset 161976)
[site_daily] Before append (Offset 161976)
[site_daily] After append (Offset 161976)
[site_daily] Before json.loads (Offset 161977)
[site_daily] After json.loads (Offset 161977)
[site_daily] Before append (Offset 161977)
[site_daily] After append (Offset 161977)
[site_daily] Before json.loads (Offset 161978)
[site_daily] After json.loads (Offset 161978)
[site_daily] Before append (Offset 161978)
[site_daily] After append (Offset 161978)
[site_daily] Before json.loads (Offset 161979)
[site_daily] After json.loads (Offset 161979)
[site_daily] Before append (Offset 161979)
[site_daily] After append (Offset 161979)
[site_daily] Before json.loads (Offset 161980)
[site_daily] After json.loads (Offset 161980)
[site_daily] Before append (Offset 161980)
[site_daily] After append (Offset 161980)
[site_daily] Before json.loads (Offset 161981)
[site_daily] After js

[site_daily] Before json.loads (Offset 162022)
[site_daily] After json.loads (Offset 162022)
[site_daily] Before append (Offset 162022)
[site_daily] After append (Offset 162022)
[site_daily] Before json.loads (Offset 162023)
[site_daily] After json.loads (Offset 162023)
[site_daily] Before append (Offset 162023)
[site_daily] After append (Offset 162023)
[site_daily] Before json.loads (Offset 162024)
[site_daily] After json.loads (Offset 162024)
[site_daily] Before append (Offset 162024)
[site_daily] After append (Offset 162024)
[site_daily] Before json.loads (Offset 162025)
[site_daily] After json.loads (Offset 162025)
[site_daily] Before append (Offset 162025)
[site_daily] After append (Offset 162025)
[site_daily] Before json.loads (Offset 162026)
[site_daily] After json.loads (Offset 162026)
[site_daily] Before append (Offset 162026)
[site_daily] After append (Offset 162026)
[site_daily] Before json.loads (Offset 162027)
[site_daily] After json.loads (Offset 162027)
[site_daily] Befo

  if sorted_items: labels, values = zip(*sorted_items); ax.bar(labels, values); ax.set_xticklabels(labels, rotation=75, ha='right'); max_val_found_bldg = max(max_val_found_bldg, max(values) if values else 0)
  if sorted_items: labels, values = zip(*sorted_items); ax.bar(labels, values); ax.set_xticklabels(labels, rotation=75, ha='right'); max_val_found_bldg = max(max_val_found_bldg, max(values) if values else 0)


--- Plots updated for date: 2022-05-11 ---

--- Plotter Loop @ 06:20:31 ---
  Latest Date in Deques: 2022-05-11
  Last Plotted Date: 2022-05-11
  Building Deque Size: 200
  Site Deque Size: 200

--- Plotter Loop @ 06:20:40 ---
  Latest Date in Deques: 2022-05-11
  Last Plotted Date: 2022-05-11
  Building Deque Size: 200
  Site Deque Size: 200
[site_daily] Before json.loads (Offset 162063)
[site_daily] After json.loads (Offset 162063)
[site_daily] Before append (Offset 162063)
[site_daily] After append (Offset 162063)
[site_daily] Before json.loads (Offset 162064)
[site_daily] After json.loads (Offset 162064)
[site_daily] Before append (Offset 162064)
[site_daily] After append (Offset 162064)
[site_daily] Before json.loads (Offset 162065)
[site_daily] After json.loads (Offset 162065)
[site_daily] Before append (Offset 162065)
[site_daily] After append (Offset 162065)
[site_daily] Before json.loads (Offset 162066)
[site_daily] After json.loads (Offset 162066)
[site_daily] Before append (

[site_daily] After append (Offset 162106)
[site_daily] Before json.loads (Offset 162107)
[site_daily] After json.loads (Offset 162107)
[site_daily] Before append (Offset 162107)
[site_daily] After append (Offset 162107)
--- Plots updated for date: 2022-05-12 ---
[site_daily] Before json.loads (Offset 162108)
[site_daily] After json.loads (Offset 162108)
[site_daily] Before append (Offset 162108)
[site_daily] After append (Offset 162108)
[site_daily] Before json.loads (Offset 162109)
[site_daily] After json.loads (Offset 162109)
[site_daily] Before append (Offset 162109)
[site_daily] After append (Offset 162109)
[site_daily] Before json.loads (Offset 162110)
[site_daily] After json.loads (Offset 162110)
[site_daily] Before append (Offset 162110)
[site_daily] After append (Offset 162110)
[site_daily] Before json.loads (Offset 162111)
[site_daily] After json.loads (Offset 162111)
[site_daily] Before append (Offset 162111)
[site_daily] After append (Offset 162111)
[site_daily] Before json.

[site_daily] After append (Offset 162151)
[site_daily] Before json.loads (Offset 162152)
[site_daily] After json.loads (Offset 162152)
[site_daily] Before append (Offset 162152)
[site_daily] After append (Offset 162152)
[site_daily] Before json.loads (Offset 162153)
[site_daily] After json.loads (Offset 162153)
[site_daily] Before append (Offset 162153)
[site_daily] After append (Offset 162153)
[site_daily] Before json.loads (Offset 162154)
[site_daily] After json.loads (Offset 162154)
[site_daily] Before append (Offset 162154)
[site_daily] After append (Offset 162154)
[site_daily] Before json.loads (Offset 162155)
[site_daily] After json.loads (Offset 162155)
[site_daily] Before append (Offset 162155)
[site_daily] After append (Offset 162155)
[site_daily] Before json.loads (Offset 162156)
[site_daily] After json.loads (Offset 162156)
[site_daily] Before append (Offset 162156)
[site_daily] After append (Offset 162156)
[site_daily] Before json.loads (Offset 162157)
[site_daily] After js

[site_daily] Before json.loads (Offset 162198)
[site_daily] After json.loads (Offset 162198)
[site_daily] Before append (Offset 162198)
[site_daily] After append (Offset 162198)
[site_daily] Before json.loads (Offset 162199)
[site_daily] After json.loads (Offset 162199)
[site_daily] Before append (Offset 162199)
[site_daily] After append (Offset 162199)
[site_daily] Before json.loads (Offset 162200)
[site_daily] After json.loads (Offset 162200)
[site_daily] Before append (Offset 162200)
[site_daily] After append (Offset 162200)
[site_daily] Before json.loads (Offset 162201)
[site_daily] After json.loads (Offset 162201)
[site_daily] Before append (Offset 162201)
[site_daily] After append (Offset 162201)
[site_daily] Before json.loads (Offset 162202)
[site_daily] After json.loads (Offset 162202)
[site_daily] Before append (Offset 162202)
[site_daily] After append (Offset 162202)
[site_daily] Before json.loads (Offset 162203)
[site_daily] After json.loads (Offset 162203)
[site_daily] Befo

[site_daily] After json.loads (Offset 162244)
[site_daily] Before append (Offset 162244)
[site_daily] After append (Offset 162244)
[site_daily] Before json.loads (Offset 162245)
[site_daily] After json.loads (Offset 162245)
[site_daily] Before append (Offset 162245)
[site_daily] After append (Offset 162245)
[site_daily] Before json.loads (Offset 162246)
[site_daily] After json.loads (Offset 162246)
[site_daily] Before append (Offset 162246)
[site_daily] After append (Offset 162246)
[site_daily] Before json.loads (Offset 162247)
[site_daily] After json.loads (Offset 162247)
[site_daily] Before append (Offset 162247)
[site_daily] After append (Offset 162247)
[site_daily] Before json.loads (Offset 162248)
[site_daily] After json.loads (Offset 162248)
[site_daily] Before append (Offset 162248)
[site_daily] After append (Offset 162248)
[site_daily] Before json.loads (Offset 162249)
[site_daily] After json.loads (Offset 162249)
[site_daily] Before append (Offset 162249)
[site_daily] After ap

  Site Deque Size: 200

--- Plotter Loop @ 06:21:31 ---
  Latest Date in Deques: 2022-05-12
  Last Plotted Date: 2022-05-12
  Building Deque Size: 200
  Site Deque Size: 200

--- New latest date detected: 2022-05-14. Preparing plot... ---


  if sorted_items: labels, values = zip(*sorted_items); ax.bar(labels, values); ax.set_xticklabels(labels, rotation=75, ha='right'); max_val_found_bldg = max(max_val_found_bldg, max(values) if values else 0)
  if sorted_items: labels, values = zip(*sorted_items); ax.bar(labels, values); ax.set_xticklabels(labels, rotation=75, ha='right'); max_val_found_bldg = max(max_val_found_bldg, max(values) if values else 0)
  if sorted_items: labels, values = zip(*sorted_items); ax.bar(labels, values); ax.set_xticklabels(labels, rotation=75, ha='right'); max_val_found_bldg = max(max_val_found_bldg, max(values) if values else 0)
  if sorted_items: labels, values = zip(*sorted_items); ax.bar(labels, values); ax.set_xticklabels(labels, rotation=75, ha='right'); max_val_found_bldg = max(max_val_found_bldg, max(values) if values else 0)


--- Plots updated for date: 2022-05-14 ---
[site_daily] Before json.loads (Offset 162288)
[site_daily] After json.loads (Offset 162288)
[site_daily] Before append (Offset 162288)
[site_daily] After append (Offset 162288)
[site_daily] Before json.loads (Offset 162289)
[site_daily] After json.loads (Offset 162289)
[site_daily] Before append (Offset 162289)
[site_daily] After append (Offset 162289)
[site_daily] Before json.loads (Offset 162290)
[site_daily] After json.loads (Offset 162290)
[site_daily] Before append (Offset 162290)
[site_daily] After append (Offset 162290)
[site_daily] Before json.loads (Offset 162291)
[site_daily] After json.loads (Offset 162291)
[site_daily] Before append (Offset 162291)
[site_daily] After append (Offset 162291)
[site_daily] Before json.loads (Offset 162292)
[site_daily] After json.loads (Offset 162292)
[site_daily] Before append (Offset 162292)
[site_daily] After append (Offset 162292)
[site_daily] Before json.loads (Offset 162293)
[site_daily] After j

[site_daily] Before append (Offset 162333)
[site_daily] After append (Offset 162333)
[site_daily] Before json.loads (Offset 162334)
[site_daily] After json.loads (Offset 162334)
[site_daily] Before append (Offset 162334)
[site_daily] After append (Offset 162334)
[site_daily] Before json.loads (Offset 162335)
[site_daily] After json.loads (Offset 162335)
[site_daily] Before append (Offset 162335)
[site_daily] After append (Offset 162335)
[site_daily] Before json.loads (Offset 162336)
[site_daily] After json.loads (Offset 162336)
[site_daily] Before append (Offset 162336)
[site_daily] After append (Offset 162336)
[site_daily] Before json.loads (Offset 162337)
[site_daily] After json.loads (Offset 162337)
[site_daily] Before append (Offset 162337)
[site_daily] After append (Offset 162337)
[site_daily] Before json.loads (Offset 162338)
[site_daily] After json.loads (Offset 162338)
[site_daily] Before append (Offset 162338)
[site_daily] After append (Offset 162338)
[site_daily] Before json.

[site_daily] Before append (Offset 162378)
[site_daily] After append (Offset 162378)
[site_daily] Before json.loads (Offset 162379)
[site_daily] After json.loads (Offset 162379)
[site_daily] Before append (Offset 162379)
[site_daily] After append (Offset 162379)
[site_daily] Before json.loads (Offset 162380)
[site_daily] After json.loads (Offset 162380)
[site_daily] Before append (Offset 162380)
[site_daily] After append (Offset 162380)
[site_daily] Before json.loads (Offset 162381)
[site_daily] After json.loads (Offset 162381)
[site_daily] Before append (Offset 162381)
[site_daily] After append (Offset 162381)
[site_daily] Before json.loads (Offset 162382)
[site_daily] After json.loads (Offset 162382)
[site_daily] Before append (Offset 162382)
[site_daily] After append (Offset 162382)
[site_daily] Before json.loads (Offset 162383)
[site_daily] After json.loads (Offset 162383)
[site_daily] Before append (Offset 162383)
[site_daily] After append (Offset 162383)
[site_daily] Before json.

[site_daily] After append (Offset 162424)
[site_daily] Before json.loads (Offset 162425)
[site_daily] After json.loads (Offset 162425)
[site_daily] Before append (Offset 162425)
[site_daily] After append (Offset 162425)
[site_daily] Before json.loads (Offset 162426)
[site_daily] After json.loads (Offset 162426)
[site_daily] Before append (Offset 162426)
[site_daily] After append (Offset 162426)
[site_daily] Before json.loads (Offset 162427)
[site_daily] After json.loads (Offset 162427)
[site_daily] Before append (Offset 162427)
[site_daily] After append (Offset 162427)
[site_daily] Before json.loads (Offset 162428)
[site_daily] After json.loads (Offset 162428)
[site_daily] Before append (Offset 162428)
[site_daily] After append (Offset 162428)
[site_daily] Before json.loads (Offset 162429)
[site_daily] After json.loads (Offset 162429)
[site_daily] Before append (Offset 162429)
[site_daily] After append (Offset 162429)
[site_daily] Before json.loads (Offset 162430)
[site_daily] After js

[site_daily] Before json.loads (Offset 162471)
[site_daily] After json.loads (Offset 162471)
[site_daily] Before append (Offset 162471)
[site_daily] After append (Offset 162471)
[site_daily] Before json.loads (Offset 162472)
[site_daily] After json.loads (Offset 162472)
[site_daily] Before append (Offset 162472)
[site_daily] After append (Offset 162472)
[site_daily] Before json.loads (Offset 162473)
[site_daily] After json.loads (Offset 162473)
[site_daily] Before append (Offset 162473)
[site_daily] After append (Offset 162473)
[site_daily] Before json.loads (Offset 162474)
[site_daily] After json.loads (Offset 162474)
[site_daily] Before append (Offset 162474)
[site_daily] After append (Offset 162474)
[site_daily] Before json.loads (Offset 162475)
[site_daily] After json.loads (Offset 162475)
[site_daily] Before append (Offset 162475)
[site_daily] After append (Offset 162475)
[site_daily] Before json.loads (Offset 162476)
[site_daily] After json.loads (Offset 162476)
[site_daily] Befo

[site_daily] After json.loads (Offset 162516)
[site_daily] Before append (Offset 162516)
[site_daily] After append (Offset 162516)
[site_daily] Before json.loads (Offset 162517)
[site_daily] After json.loads (Offset 162517)
[site_daily] Before append (Offset 162517)
[site_daily] After append (Offset 162517)
[site_daily] Before json.loads (Offset 162518)
[site_daily] After json.loads (Offset 162518)
[site_daily] Before append (Offset 162518)
[site_daily] After append (Offset 162518)
[site_daily] Before json.loads (Offset 162519)
[site_daily] After json.loads (Offset 162519)
[site_daily] Before append (Offset 162519)
[site_daily] After append (Offset 162519)
[site_daily] Before json.loads (Offset 162520)
[site_daily] After json.loads (Offset 162520)
[site_daily] Before append (Offset 162520)
[site_daily] After append (Offset 162520)
[site_daily] Before json.loads (Offset 162521)
[site_daily] After json.loads (Offset 162521)
[site_daily] Before append (Offset 162521)
[site_daily] After ap

  if sorted_items: labels, values = zip(*sorted_items); ax.bar(labels, values); ax.set_xticklabels(labels, rotation=75, ha='right'); max_val_found_bldg = max(max_val_found_bldg, max(values) if values else 0)
  if sorted_items: labels, values = zip(*sorted_items); ax.bar(labels, values); ax.set_xticklabels(labels, rotation=75, ha='right'); max_val_found_bldg = max(max_val_found_bldg, max(values) if values else 0)
  if sorted_items: labels, values = zip(*sorted_items); ax.bar(labels, values); ax.set_xticklabels(labels, rotation=75, ha='right'); max_val_found_bldg = max(max_val_found_bldg, max(values) if values else 0)


--- Plots updated for date: 2022-05-18 ---

--- Plotter Loop @ 06:22:20 ---
  Latest Date in Deques: 2022-05-18
  Last Plotted Date: 2022-05-18
  Building Deque Size: 200
  Site Deque Size: 200

--- Plotter Loop @ 06:22:30 ---
  Latest Date in Deques: 2022-05-18
  Last Plotted Date: 2022-05-18
  Building Deque Size: 200
  Site Deque Size: 200
[site_daily] Before json.loads (Offset 162532)
[site_daily] After json.loads (Offset 162532)
[site_daily] Before append (Offset 162532)
[site_daily] After append (Offset 162532)
[site_daily] Before json.loads (Offset 162533)
[site_daily] After json.loads (Offset 162533)
[site_daily] Before append (Offset 162533)
[site_daily] After append (Offset 162533)
[site_daily] Before json.loads (Offset 162534)
[site_daily] After json.loads (Offset 162534)
[site_daily] Before append (Offset 162534)
[site_daily] After append (Offset 162534)
[site_daily] Before json.loads (Offset 162535)
[site_daily] After json.loads (Offset 162535)
[site_daily] Before append (

[site_daily] After json.loads (Offset 162576)
[site_daily] Before append (Offset 162576)
[site_daily] After append (Offset 162576)
[site_daily] Before json.loads (Offset 162577)
[site_daily] After json.loads (Offset 162577)
[site_daily] Before append (Offset 162577)
[site_daily] After append (Offset 162577)
[site_daily] Before json.loads (Offset 162578)
[site_daily] After json.loads (Offset 162578)
[site_daily] Before append (Offset 162578)
[site_daily] After append (Offset 162578)
[site_daily] Before json.loads (Offset 162579)
[site_daily] After json.loads (Offset 162579)
[site_daily] Before append (Offset 162579)
[site_daily] After append (Offset 162579)
[site_daily] Before json.loads (Offset 162580)
[site_daily] After json.loads (Offset 162580)
[site_daily] Before append (Offset 162580)
[site_daily] After append (Offset 162580)
[site_daily] Before json.loads (Offset 162581)
[site_daily] After json.loads (Offset 162581)
[site_daily] Before append (Offset 162581)
[site_daily] After ap

[site_daily] Before append (Offset 162622)
[site_daily] After append (Offset 162622)
[site_daily] Before json.loads (Offset 162623)
[site_daily] After json.loads (Offset 162623)
[site_daily] Before append (Offset 162623)
[site_daily] After append (Offset 162623)
[site_daily] Before json.loads (Offset 162624)
[site_daily] After json.loads (Offset 162624)
[site_daily] Before append (Offset 162624)
[site_daily] After append (Offset 162624)
[site_daily] Before json.loads (Offset 162625)
[site_daily] After json.loads (Offset 162625)
[site_daily] Before append (Offset 162625)
[site_daily] After append (Offset 162625)
[site_daily] Before json.loads (Offset 162626)
[site_daily] After json.loads (Offset 162626)
[site_daily] Before append (Offset 162626)
[site_daily] After append (Offset 162626)
[site_daily] Before json.loads (Offset 162627)
[site_daily] After json.loads (Offset 162627)
[site_daily] Before append (Offset 162627)
[site_daily] After append (Offset 162627)
[site_daily] Before json.

[site_daily] After append (Offset 162668)
[site_daily] Before json.loads (Offset 162669)
[site_daily] After json.loads (Offset 162669)
[site_daily] Before append (Offset 162669)
[site_daily] After append (Offset 162669)
[site_daily] Before json.loads (Offset 162670)
[site_daily] After json.loads (Offset 162670)
[site_daily] Before append (Offset 162670)
[site_daily] After append (Offset 162670)
[site_daily] Before json.loads (Offset 162671)
[site_daily] After json.loads (Offset 162671)
[site_daily] Before append (Offset 162671)
[site_daily] After append (Offset 162671)
[site_daily] Before json.loads (Offset 162672)
[site_daily] After json.loads (Offset 162672)
[site_daily] Before append (Offset 162672)
[site_daily] After append (Offset 162672)
[site_daily] Before json.loads (Offset 162673)
[site_daily] After json.loads (Offset 162673)
[site_daily] Before append (Offset 162673)
[site_daily] After append (Offset 162673)
[site_daily] Before json.loads (Offset 162674)
[site_daily] After js

[site_daily] Before append (Offset 162714)
[site_daily] After append (Offset 162714)
[site_daily] Before json.loads (Offset 162715)
[site_daily] After json.loads (Offset 162715)
[site_daily] Before append (Offset 162715)
[site_daily] After append (Offset 162715)
[site_daily] Before json.loads (Offset 162716)
[site_daily] After json.loads (Offset 162716)
[site_daily] Before append (Offset 162716)
[site_daily] After append (Offset 162716)
[site_daily] Before json.loads (Offset 162717)
[site_daily] After json.loads (Offset 162717)
[site_daily] Before append (Offset 162717)
[site_daily] After append (Offset 162717)
[site_daily] Before json.loads (Offset 162718)
[site_daily] After json.loads (Offset 162718)
[site_daily] Before append (Offset 162718)
[site_daily] After append (Offset 162718)
[site_daily] Before json.loads (Offset 162719)
[site_daily] After json.loads (Offset 162719)
[site_daily] Before append (Offset 162719)
[site_daily] After append (Offset 162719)
[site_daily] Before json.

  if sorted_items: labels, values = zip(*sorted_items); ax.bar(labels, values); ax.set_xticklabels(labels, rotation=75, ha='right'); max_val_found_bldg = max(max_val_found_bldg, max(values) if values else 0)
  if sorted_items: labels, values = zip(*sorted_items); ax.bar(labels, values); ax.set_xticklabels(labels, rotation=75, ha='right'); max_val_found_bldg = max(max_val_found_bldg, max(values) if values else 0)


--- Plots updated for date: 2022-05-22 ---

--- Plotter Loop @ 06:23:20 ---
  Latest Date in Deques: 2022-05-22
  Last Plotted Date: 2022-05-22
  Building Deque Size: 200
  Site Deque Size: 200

--- Plotter Loop @ 06:23:30 ---
  Latest Date in Deques: 2022-05-22
  Last Plotted Date: 2022-05-22
  Building Deque Size: 200
  Site Deque Size: 200
[site_daily] Before json.loads (Offset 162751)
[site_daily] After json.loads (Offset 162751)
[site_daily] Before append (Offset 162751)
[site_daily] After append (Offset 162751)
[site_daily] Before json.loads (Offset 162752)
[site_daily] After json.loads (Offset 162752)
[site_daily] Before append (Offset 162752)
[site_daily] After append (Offset 162752)
[site_daily] Before json.loads (Offset 162753)
[site_daily] After json.loads (Offset 162753)
[site_daily] Before append (Offset 162753)
[site_daily] After append (Offset 162753)
[site_daily] Before json.loads (Offset 162754)
[site_daily] After json.loads (Offset 162754)
[site_daily] Before append (

[site_daily] After json.loads (Offset 162795)
[site_daily] Before append (Offset 162795)
[site_daily] After append (Offset 162795)
[site_daily] Before json.loads (Offset 162796)
[site_daily] After json.loads (Offset 162796)
[site_daily] Before append (Offset 162796)
[site_daily] After append (Offset 162796)
[site_daily] Before json.loads (Offset 162797)
[site_daily] After json.loads (Offset 162797)
[site_daily] Before append (Offset 162797)
[site_daily] After append (Offset 162797)
[site_daily] Before json.loads (Offset 162798)
[site_daily] After json.loads (Offset 162798)
[site_daily] Before append (Offset 162798)
[site_daily] After append (Offset 162798)
[site_daily] Before json.loads (Offset 162799)
[site_daily] After json.loads (Offset 162799)
[site_daily] Before append (Offset 162799)
[site_daily] After append (Offset 162799)
[site_daily] Before json.loads (Offset 162800)
[site_daily] After json.loads (Offset 162800)
[site_daily] Before append (Offset 162800)
[site_daily] After ap

[site_daily] Before append (Offset 162841)
[site_daily] After append (Offset 162841)
[site_daily] Before json.loads (Offset 162842)
[site_daily] After json.loads (Offset 162842)
[site_daily] Before append (Offset 162842)
[site_daily] After append (Offset 162842)
[site_daily] Before json.loads (Offset 162843)
[site_daily] After json.loads (Offset 162843)
[site_daily] Before append (Offset 162843)
[site_daily] After append (Offset 162843)
[site_daily] Before json.loads (Offset 162844)
[site_daily] After json.loads (Offset 162844)
[site_daily] Before append (Offset 162844)
[site_daily] After append (Offset 162844)
[site_daily] Before json.loads (Offset 162845)
[site_daily] After json.loads (Offset 162845)
[site_daily] Before append (Offset 162845)
[site_daily] After append (Offset 162845)
[site_daily] Before json.loads (Offset 162846)
[site_daily] After json.loads (Offset 162846)
[site_daily] Before append (Offset 162846)
[site_daily] After append (Offset 162846)
[site_daily] Before json.

[site_daily] After json.loads (Offset 162887)
[site_daily] Before append (Offset 162887)
[site_daily] After append (Offset 162887)
[site_daily] Before json.loads (Offset 162888)
[site_daily] After json.loads (Offset 162888)
[site_daily] Before append (Offset 162888)
[site_daily] After append (Offset 162888)
[site_daily] Before json.loads (Offset 162889)
[site_daily] After json.loads (Offset 162889)
[site_daily] Before append (Offset 162889)
[site_daily] After append (Offset 162889)
[site_daily] Before json.loads (Offset 162890)
[site_daily] After json.loads (Offset 162890)
[site_daily] Before append (Offset 162890)
[site_daily] After append (Offset 162890)
[site_daily] Before json.loads (Offset 162891)
[site_daily] After json.loads (Offset 162891)
[site_daily] Before append (Offset 162891)
[site_daily] After append (Offset 162891)
[site_daily] Before json.loads (Offset 162892)
[site_daily] After json.loads (Offset 162892)
[site_daily] Before append (Offset 162892)
[site_daily] After ap

[site_daily] Before append (Offset 162933)
[site_daily] After append (Offset 162933)
[site_daily] Before json.loads (Offset 162934)
[site_daily] After json.loads (Offset 162934)
[site_daily] Before append (Offset 162934)
[site_daily] After append (Offset 162934)
[site_daily] Before json.loads (Offset 162935)
[site_daily] After json.loads (Offset 162935)
--- Plots updated for date: 2022-05-23 ---
[site_daily] Before append (Offset 162935)

--- Plotter Loop @ 06:23:40 ---
[site_daily] After append (Offset 162935)
  Latest Date in Deques: 2022-05-23
[site_daily] Before json.loads (Offset 162936)
  Last Plotted Date: 2022-05-23
  Building Deque Size: 200
[site_daily] After json.loads (Offset 162936)
[site_daily] Before append (Offset 162936)
  Site Deque Size: 200
[site_daily] After append (Offset 162936)
[site_daily] Before json.loads (Offset 162937)
[site_daily] After json.loads (Offset 162937)
[site_daily] Before append (Offset 162937)
[site_daily] After append (Offset 162937)
[site_dai

[site_daily] Before append (Offset 162978)
[site_daily] After append (Offset 162978)
[site_daily] Before json.loads (Offset 162979)
[site_daily] After json.loads (Offset 162979)
[site_daily] Before append (Offset 162979)
[site_daily] After append (Offset 162979)
[site_daily] Before json.loads (Offset 162980)
[site_daily] After json.loads (Offset 162980)
[site_daily] Before append (Offset 162980)
[site_daily] After append (Offset 162980)
[site_daily] Before json.loads (Offset 162981)
[site_daily] After json.loads (Offset 162981)
[site_daily] Before append (Offset 162981)
[site_daily] After append (Offset 162981)
[site_daily] Before json.loads (Offset 162982)
[site_daily] After json.loads (Offset 162982)
[site_daily] Before append (Offset 162982)
[site_daily] After append (Offset 162982)
[site_daily] Before json.loads (Offset 162983)
[site_daily] After json.loads (Offset 162983)
[site_daily] Before append (Offset 162983)
[site_daily] After append (Offset 162983)
[site_daily] Before json.

  if sorted_items: labels, values = zip(*sorted_items); ax.bar(labels, values); ax.set_xticklabels(labels, rotation=75, ha='right'); max_val_found_bldg = max(max_val_found_bldg, max(values) if values else 0)
  if sorted_items: labels, values = zip(*sorted_items); ax.bar(labels, values); ax.set_xticklabels(labels, rotation=75, ha='right'); max_val_found_bldg = max(max_val_found_bldg, max(values) if values else 0)
  if sorted_items: labels, values = zip(*sorted_items); ax.bar(labels, values); ax.set_xticklabels(labels, rotation=75, ha='right'); max_val_found_bldg = max(max_val_found_bldg, max(values) if values else 0)
  if sorted_items: labels, values = zip(*sorted_items); ax.bar(labels, values); ax.set_xticklabels(labels, rotation=75, ha='right'); max_val_found_bldg = max(max_val_found_bldg, max(values) if values else 0)


--- Plots updated for date: 2022-05-25 ---

--- Plotter Loop @ 06:24:21 ---
  Latest Date in Deques: 2022-05-25
  Last Plotted Date: 2022-05-25
  Building Deque Size: 200
  Site Deque Size: 200

--- Plotter Loop @ 06:24:31 ---
  Latest Date in Deques: 2022-05-25
  Last Plotted Date: 2022-05-25
  Building Deque Size: 200
  Site Deque Size: 200

--- Plotter Loop @ 06:24:41 ---
  Latest Date in Deques: 2022-05-25
  Last Plotted Date: 2022-05-25
  Building Deque Size: 200
  Site Deque Size: 200
[site_daily] Before json.loads (Offset 163004)
[site_daily] After json.loads (Offset 163004)
[site_daily] Before append (Offset 163004)
[site_daily] After append (Offset 163004)
[site_daily] Before json.loads (Offset 163005)
[site_daily] After json.loads (Offset 163005)
[site_daily] Before append (Offset 163005)
[site_daily] After append (Offset 163005)
[site_daily] Before json.loads (Offset 163006)
[site_daily] After json.loads (Offset 163006)
[site_daily] Before append (Offset 163006)
[site_daily]

[site_daily] After json.loads (Offset 163047)
[site_daily] Before append (Offset 163047)
[site_daily] After append (Offset 163047)
[site_daily] Before json.loads (Offset 163048)
[site_daily] After json.loads (Offset 163048)
[site_daily] Before append (Offset 163048)
[site_daily] After append (Offset 163048)
[site_daily] Before json.loads (Offset 163049)
[site_daily] After json.loads (Offset 163049)
[site_daily] Before append (Offset 163049)
[site_daily] After append (Offset 163049)
[site_daily] Before json.loads (Offset 163050)
[site_daily] After json.loads (Offset 163050)
[site_daily] Before append (Offset 163050)
[site_daily] After append (Offset 163050)
[site_daily] Before json.loads (Offset 163051)
[site_daily] After json.loads (Offset 163051)
[site_daily] Before append (Offset 163051)
[site_daily] After append (Offset 163051)
[site_daily] Before json.loads (Offset 163052)
[site_daily] After json.loads (Offset 163052)
[site_daily] Before append (Offset 163052)
[site_daily] After ap

[site_daily] Before append (Offset 163093)
[site_daily] After append (Offset 163093)
[site_daily] Before json.loads (Offset 163094)
[site_daily] After json.loads (Offset 163094)
[site_daily] Before append (Offset 163094)
[site_daily] After append (Offset 163094)
[site_daily] Before json.loads (Offset 163095)
[site_daily] After json.loads (Offset 163095)
[site_daily] Before append (Offset 163095)
[site_daily] After append (Offset 163095)
[site_daily] Before json.loads (Offset 163096)
[site_daily] After json.loads (Offset 163096)
[site_daily] Before append (Offset 163096)
[site_daily] After append (Offset 163096)
[site_daily] Before json.loads (Offset 163097)
[site_daily] After json.loads (Offset 163097)
[site_daily] Before append (Offset 163097)
[site_daily] After append (Offset 163097)
[site_daily] Before json.loads (Offset 163098)
[site_daily] After json.loads (Offset 163098)
[site_daily] Before append (Offset 163098)
[site_daily] After append (Offset 163098)
[site_daily] Before json.

[site_daily] After json.loads (Offset 163139)
[site_daily] Before append (Offset 163139)
[site_daily] After append (Offset 163139)
[site_daily] Before json.loads (Offset 163140)
[site_daily] After json.loads (Offset 163140)
[site_daily] Before append (Offset 163140)
[site_daily] After append (Offset 163140)
[site_daily] Before json.loads (Offset 163141)
[site_daily] After json.loads (Offset 163141)
[site_daily] Before append (Offset 163141)
[site_daily] After append (Offset 163141)
[site_daily] Before json.loads (Offset 163142)
[site_daily] After json.loads (Offset 163142)
[site_daily] Before append (Offset 163142)
[site_daily] After append (Offset 163142)
[site_daily] Before json.loads (Offset 163143)
[site_daily] After json.loads (Offset 163143)
[site_daily] Before append (Offset 163143)
[site_daily] After append (Offset 163143)
[site_daily] Before json.loads (Offset 163144)
[site_daily] After json.loads (Offset 163144)
[site_daily] Before append (Offset 163144)
[site_daily] After ap

[site_daily] Before append (Offset 163185)
[site_daily] After append (Offset 163185)
[site_daily] Before json.loads (Offset 163186)
[site_daily] After json.loads (Offset 163186)
[site_daily] Before append (Offset 163186)
[site_daily] After append (Offset 163186)
[site_daily] Before json.loads (Offset 163187)
[site_daily] After json.loads (Offset 163187)
[site_daily] Before append (Offset 163187)
[site_daily] After append (Offset 163187)
[site_daily] Before json.loads (Offset 163188)
[site_daily] After json.loads (Offset 163188)
[site_daily] Before append (Offset 163188)
[site_daily] After append (Offset 163188)
--- Plots updated for date: 2022-05-26 ---
[site_daily] Before json.loads (Offset 163189)

--- Plotter Loop @ 06:24:50 ---
[site_daily] After json.loads (Offset 163189)
  Latest Date in Deques: 2022-05-26
  Last Plotted Date: 2022-05-26
[site_daily] Before append (Offset 163189)
[site_daily] After append (Offset 163189)
  Building Deque Size: 200
  Site Deque Size: 200[site_dail

In [None]:
# --- 1. Configuration & Global State ---
# (Keep HOST_IP, TOPIC_BUILDING, TOPIC_SITE, stores, lock, yaxis_max, consumers list, current_target_plot_date as before)
import collections # Make sure this is imported
from kafka3 import KafkaConsumer
import json
import matplotlib.pyplot as plt
from datetime import datetime, timedelta, date
from threading import Thread, Lock
import time
import atexit
import traceback # Import traceback for detailed errors

# This magic command is necessary for live plotting in Jupyter
%matplotlib notebook

# --- CONFIGURATION ---
HOST_IP = "192.168.0.6" # Use your machine's IP
TOPIC_BUILDING = "building_6h"
TOPIC_SITE = "site_daily"

# Thread-safe global data stores using defaultdict for easier handling
building_data_store = collections.defaultdict(lambda: collections.defaultdict(dict))
site_data_store = collections.defaultdict(lambda: {str(i): 0 for i in range(16)})

data_lock = Lock()
yaxis_max_building = 500.0
yaxis_max_site = 5000.0
consumers = [] # Renamed from consumers_list for consistency
# current_target_plot_date = date.today() - timedelta(days=7)


# --- UPDATED connect_kafka_consumer ---
def connect_kafka_consumer(topic):
    """Connects to a specific Kafka topic with more logging."""
    consumer = None
    group_id = f'{topic}-plotter-{datetime.now()}'
    print(f"[{topic}] Attempting connection to bootstrap server {HOST_IP}:9092...", flush=True)
    try:
        consumer = KafkaConsumer(
            topic,
            bootstrap_servers=[f'{HOST_IP}:9092'],
            auto_offset_reset='latest', # Start reading only NEW messages
            group_id=group_id,          # Use the unique group ID
            # ** Temporarily change deserializer to raw bytes/string **
            value_deserializer=lambda x: x.decode('utf-8', errors='ignore'), # Decode bytes to string safely
            api_version=(0, 10), # Keep explicit api_version if needed
            # Add some network timeouts which might help diagnose connection hangs
            request_timeout_ms=65000, # e.g., 15 seconds
            session_timeout_ms=60000, # e.g., 30 seconds
            heartbeat_interval_ms=15000, # Set to 15 seconds (should be < session_timeout / 3)
            # Can keep auto-commit enabled for simplicity here
            enable_auto_commit=True
        )
        # Check if the consumer actually connected by trying to access partitions
        partitions = consumer.partitions_for_topic(topic)
        if partitions is None or len(partitions) == 0:
             print(f"[{topic}] Warning: Connected but no partitions found for topic.", flush=True)
        else:
            print(f"[{topic}] Connection SUCCESSFUL. Partitions found: {partitions}", flush=True)

        consumers.append(consumer) # Add to list for cleanup only on success
        return consumer
    except Exception as e:
        print(f"[{topic}] FAILED to connect or fetch partitions: {e}", flush=True)
        traceback.print_exc() # Print full stack trace
        if consumer:
            consumer.close() # Attempt to close if partially created
        return None

# --- 2. Consumer Thread Functions (MORE DEBUGGING & FLUSHING) ---

def consumer_thread_building():
    """Consumes 'building_6h' data and populates the global store."""
    topic = TOPIC_BUILDING
    print(f"[{topic}] Consumer thread started.", flush=True)
    consumer = connect_kafka_consumer(topic)
    if consumer is None:
        print(f"[{topic}] Thread exiting due to connection failure.", flush=True)
        return

    time_buckets = ["0-6h", "6-12h", "12-18h", "18-24h"]
    message_count = 0
    parse_success_count = 0
    parse_error_count = 0
    print(f"[{topic}] Entering message loop...", flush=True) # Confirm loop entry

    try:
        # ** Add try/except directly around the loop **
        for message in consumer:
            message_count += 1
            raw_value_str = message.value # Now it's a string from deserializer

            # --- DEBUG: Print raw string value ---
#             print(f"[{topic}] Raw message received (Offset {message.offset}): {raw_value_str}", flush=True)

            try:
                # ** Manual JSON loading **
                msg = json.loads(raw_value_str)

                if not isinstance(msg, dict):
                    print(f"[{topic}] Error: Decoded JSON is not a dictionary: {msg}", flush=True)
                    parse_error_count += 1
                    continue

                # --- Extract data ---
                bldg_id = msg.get('building_id')
                time_bucket = msg.get('time')
                val_raw = msg.get('total_power_6h')
                date_str = msg.get('date')

                # --- Validate required fields ---
                if not all([bldg_id is not None, time_bucket, val_raw is not None, date_str]):
                    print(f"[{topic}] Skipping msg (Offset {message.offset}) with missing fields: {msg}", flush=True)
                    parse_error_count += 1
                    continue

                # --- Validate data types ---
                try:
                    val = float(val_raw) # Directly try float conversion
                except (ValueError, TypeError):
                    print(f"[{topic}] Invalid value type for total_power_6h: '{val_raw}' in msg: {msg}", flush=True)
                    parse_error_count += 1
                    continue

                if time_bucket not in time_buckets:
                    print(f"[{topic}] Invalid time bucket: '{time_bucket}' in msg: {msg}", flush=True)
                    parse_error_count += 1
                    continue

                # --- DEBUG: Confirm successful parsing ---
#                 print(f"[{topic}] Parsed OK (Offset {message.offset}): Date={date_str}, Bldg={bldg_id}, Val={val}", flush=True)
                parse_success_count += 1

                # --- Add to store ---
                with data_lock:
                    building_data_store[date_str][time_bucket][str(bldg_id)] = val

            except json.JSONDecodeError as json_e:
                print(f"[{topic}] JSON Decode Error (Offset {message.offset}): {json_e}. Raw data: {raw_value_str}", flush=True)
                parse_error_count += 1
            except Exception as e_parse:
                print(f"[{topic}] Error parsing message content (Offset {message.offset}): {e_parse}. Message: {msg}", flush=True)
                traceback.print_exc()
                parse_error_count += 1

            # Print stats occasionally
            if message_count % 2000 == 0: # More frequent stats
                 print(f"[{topic}] Stats - Processed: {message_count}, Succeeded: {parse_success_count}, Failed: {parse_error_count}", flush=True)

    except Exception as e_outer_loop:
        # Catch errors occurring during iteration (e.g., connection lost)
        print(f"[{topic}] Error IN message loop: {e_outer_loop}", flush=True)
        traceback.print_exc()
    finally:
        # Consumer closing is handled by atexit, just log counts
        print(f"[{topic}] Exiting message loop. Final Counts - Processed: {message_count}, Succeeded: {parse_success_count}, Failed: {parse_error_count}", flush=True)


def consumer_thread_site():
    """Consumes 'site_daily' data and populates the global store."""
    topic = TOPIC_SITE
    print(f"[{topic}] Consumer thread started.", flush=True)
    consumer = connect_kafka_consumer(topic)
    if consumer is None:
        print(f"[{topic}] Thread exiting due to connection failure.", flush=True)
        return

    all_sites = [str(i) for i in range(16)]
    message_count = 0
    parse_success_count = 0
    parse_error_count = 0
    print(f"[{topic}] Entering message loop...", flush=True) # Confirm loop entry

    try:
        # ** Add try/except directly around the loop **
        for message in consumer:
            message_count += 1
            raw_value_str = message.value # Now it's a string

            # --- DEBUG: Print raw string value ---
#             print(f"[{topic}] Raw message received (Offset {message.offset}): {raw_value_str}", flush=True)

            try:
                # ** Manual JSON loading **
                msg = json.loads(raw_value_str)

                if not isinstance(msg, dict):
                    print(f"[{topic}] Error: Decoded JSON is not a dictionary: {msg}", flush=True)
                    parse_error_count += 1
                    continue

                # --- Extract data ---
                site_id_raw = msg.get('site_id')
                val_raw = msg.get('total_power_day')
                date_str = msg.get('date')

                # --- Validate required fields ---
                if site_id_raw is None or val_raw is None or date_str is None:
                    print(f"[{topic}] Skipping msg (Offset {message.offset}) with missing fields: {msg}", flush=True)
                    parse_error_count += 1
                    continue

                # --- Validate data types ---
                try:
                    val = float(val_raw) # Directly try float conversion
                except (ValueError, TypeError):
                    print(f"[{topic}] Invalid value type for total_power_day: '{val_raw}' in msg: {msg}", flush=True)
                    parse_error_count += 1
                    continue

                site_id_str = str(site_id_raw)
                if site_id_str not in all_sites:
                    print(f"[{topic}] Invalid site_id: {site_id_str} in msg: {msg}", flush=True)
                    parse_error_count += 1
                    continue

                # --- DEBUG: Confirm successful parsing ---
#                 print(f"[{topic}] Parsed OK (Offset {message.offset}): Date={date_str}, Site={site_id_str}, Val={val}", flush=True)
                parse_success_count += 1

                # --- Add to store ---
                with data_lock:
                    site_data_store[date_str][site_id_str] = val

            except json.JSONDecodeError as json_e:
                 print(f"[{topic}] JSON Decode Error (Offset {message.offset}): {json_e}. Raw data: {raw_value_str}", flush=True)
                 parse_error_count += 1
            except Exception as e_parse:
                print(f"[{topic}] Error parsing message content (Offset {message.offset}): {e_parse}. Message: {msg}", flush=True)
                traceback.print_exc()
                parse_error_count += 1

            # Print stats occasionally
            if message_count % 2000 == 0: # More frequent stats
                 print(f"[{topic}] Stats - Processed: {message_count}, Succeeded: {parse_success_count}, Failed: {parse_error_count}", flush=True)

    except Exception as e_outer_loop:
        # Catch errors occurring during iteration
        print(f"[{topic}] Error IN message loop: {e_outer_loop}", flush=True)
        traceback.print_exc()
    finally:
        # Consumer closing handled by atexit
        print(f"[{topic}] Exiting message loop. Final Counts - Processed: {message_count}, Succeeded: {parse_success_count}, Failed: {parse_error_count}", flush=True)

# --- Plotting Functions and Main Block ---
# (draw_building_plot, draw_site_plot, is_data_ready_for_date,
#  plotter_and_manager_thread, cleanup_consumers, and the main try/except block
#  remain unchanged from the previous refined version)

# --- 3. Plotting Functions ---
# (draw_building_plot and draw_site_plot remain the same as previous version)
def draw_building_plot(fig, axes, target_date_str):
    """Draws the 2x2 building plot for a specific target date."""
    global yaxis_max_building
    time_buckets = ["0-6h", "6-12h", "12-18h", "18-24h"]
    ax_map = {"0-6h": axes[0, 0], "6-12h": axes[0, 1], "12-18h": axes[1, 0], "18-24h": axes[1, 1]}
    max_val_found = 0.0

    # Use .get() on the main store to handle missing dates gracefully
    data_for_day = building_data_store.get(target_date_str, {})

    for bucket in time_buckets:
        ax = ax_map[bucket]
        ax.cla()
        # Use .get() on the date's data to handle missing buckets gracefully
        bucket_data = data_for_day.get(bucket, {})
        sorted_items = sorted(bucket_data.items(), key=lambda item: item[1], reverse=True)
        top_8_items = sorted_items[:8]

        if top_8_items:
            labels, values = zip(*top_8_items)
            ax.bar(labels, values)
            ax.set_xticklabels(labels, rotation=75, ha='right') # Ensure labels align well
            max_val_found = max(max_val_found, max(values) if values else 0) # Handle empty values list

        ax.set_title(f"Time: {bucket}")
        ax.set_ylabel("Total Power (6h)")

    yaxis_max_building = max(yaxis_max_building, max_val_found, 500)
    for ax in axes.flatten():
        ax.set_ylim(bottom=0, top=yaxis_max_building * 1.1)

    fig.suptitle(f"Building Top 8 Power (Date: {target_date_str})", y=1.02)
    fig.tight_layout(rect=[0, 0.03, 1, 0.97]) # Adjust rect slightly for better layout

def draw_site_plot(fig, axes, target_date_str, prev_date_str):
    """Draws the 2x1 site plot for a target date and previous date."""
    global yaxis_max_site
    ax_prev, ax_curr = axes[0], axes[1]
    all_sites = [str(i) for i in range(16)]

    # Use .get() with a default for missing dates
    default_day_data = {site: 0 for site in all_sites}
    prev_data = site_data_store.get(prev_date_str, default_day_data)
    curr_data = site_data_store.get(target_date_str, default_day_data)

    prev_values = [prev_data.get(site, 0) for site in all_sites]
    curr_values = [curr_data.get(site, 0) for site in all_sites]

    max_val_found = 0.0
    if prev_values: max_val_found = max(max_val_found, max(prev_values))
    if curr_values: max_val_found = max(max_val_found, max(curr_values))

    yaxis_max_site = max(yaxis_max_site, max_val_found, 5000)

    ax_prev.cla()
    ax_prev.bar(all_sites, prev_values, color='gray')
    ax_prev.set_title(f"Previous Day (Date: {prev_date_str})")
    ax_prev.set_ylabel("Total Power (Daily)")
    ax_prev.set_ylim(bottom=0, top=yaxis_max_site * 1.1)
    # Ensure all x-ticks are shown
    ax_prev.set_xticks(range(len(all_sites)))
    ax_prev.set_xticklabels(all_sites)


    ax_curr.cla()
    ax_curr.bar(all_sites, curr_values, color='blue')
    ax_curr.set_title(f"Target Day (Date: {target_date_str})")
    ax_curr.set_xlabel("Site ID")
    ax_curr.set_ylabel("Total Power (Daily)")
    ax_curr.set_ylim(bottom=0, top=yaxis_max_site * 1.1)
    # Ensure all x-ticks are shown
    ax_curr.set_xticks(range(len(all_sites)))
    ax_curr.set_xticklabels(all_sites)


    fig.suptitle("Daily Site Power Usage Comparison", y=1.0) # Add overall title
    fig.tight_layout(rect=[0, 0.03, 1, 0.97]) # Adjust rect


# --- 4. Plotter & Manager Thread (Modified Logic) ---

def is_data_ready_for_date(target_date_str):
    """
    Checks if data for the target date seems reasonably complete.
    Heuristic:
    - Site Daily: Checks if we have received *any* data for that date.
                  (Since defaultdict initializes, checking > 0 values is better)
    - Building 6h: Checks if all 4 time bucket keys exist for that date.
    """
    with data_lock:
        site_data = site_data_store.get(target_date_str)
        building_data = building_data_store.get(target_date_str)

        # Check site data: At least one site reported non-zero value
        site_ready = site_data is not None and any(v > 0 for v in site_data.values())

        # Check building data: All 4 time buckets ("0-6h", "6-12h", "12-18h", "18-24h") are present as keys
        building_ready = building_data is not None and all(b in building_data for b in ["0-6h", "6-12h", "12-18h", "18-24h"])

    # For now, let's prioritize having *some* site data and all building time buckets
    # Adjust this logic if needed based on data flow observation
    return site_ready and building_ready

# --- Plotter & Manager Thread (Plot Oldest Ready Date) ---

def plotter_and_manager_thread(fig_building, axes_building, fig_site, axes_site):
    """Plots the OLDEST date in the store that is ready, then removes it."""
    print("[Plotter] Plotter thread started.", flush=True)
    last_plotted_date_str = None # Keep track

    while True:
        try:
            plot_occurred = False
            target_str = None # Reset target for this cycle

            with data_lock:
                # --- Find Oldest Ready Date ---
                # Get common dates present in both stores (or adjust if only one needed)
                common_dates = sorted(list(set(building_data_store.keys()) & set(site_data_store.keys())))

                if not common_dates:
                    # print("[Plotter] No common dates found in stores.", flush=True) # Optional debug
                    pass # Wait for data to arrive
                else:
                    # Check the oldest common date first
                    oldest_date_str = common_dates[0]
                    if is_data_ready_for_date(oldest_date_str):
                        target_str = oldest_date_str
                        prev_str = (datetime.strptime(target_str, '%Y-%m-%d').date() - timedelta(days=1)).isoformat()

                        print(f"\n--- Plotting oldest ready date: {target_str} ---", flush=True)
                        # --- A. Plotting ---
                        draw_building_plot(fig_building, axes_building, target_str)
                        draw_site_plot(fig_site, axes_site, target_str, prev_str)

                        # --- B. Redraw Canvas ---
                        fig_building.canvas.draw_idle()
                        fig_site.canvas.draw_idle()

                        # --- C. Mark as Plotted & Remove Data ---
                        plot_occurred = True
                        last_plotted_date_str = target_str
                        # Remove the plotted date's data
                        if target_str in building_data_store: del building_data_store[target_str]
                        if target_str in site_data_store: del site_data_store[target_str]
                        print(f"--- Plotted and removed data for {target_str} ---", flush=True)
                    #else: # Optional: Check next oldest if needed, but let's do one per cycle
                        # print(f"[Plotter] Oldest date {oldest_date_str} not ready.", flush=True)


                # --- D. Debugger (runs every 5s regardless of plotting) ---
                ready_status = "Yes" if plot_occurred else "No (or No Data)"
                display_target = target_str if plot_occurred else (common_dates[0] if common_dates else "N/A")
                print(f"--- DEBUGGER @ {datetime.now().strftime('%H:%M:%S')} (Checked: {display_target}, Plotted This Cycle: {ready_status})---", flush=True)

                print("[Site Daily Counts (Recent 10 in Store)]:", flush=True)
                sorted_site_keys = sorted(site_data_store.keys())[:10] # Show oldest 10
                if not sorted_site_keys: print("  (No data)", flush=True)
                else:
                    for date_str in sorted_site_keys:
                        count = sum(1 for v in site_data_store[date_str].values() if v > 0)
                        print(f"  {date_str}: {count}/16 sites > 0", flush=True)

                print("[Building 6h Counts (Recent 10 in Store)]:", flush=True)
                sorted_bldg_keys = sorted(building_data_store.keys())[:10] # Show oldest 10
                if not sorted_bldg_keys: print("  (No data)", flush=True)
                else:
                    for date_str in sorted_bldg_keys:
                        b_data = building_data_store[date_str]
                        c0 = len(b_data.get("0-6h", {}))
                        c1 = len(b_data.get("6-12h", {}))
                        c2 = len(b_data.get("12-18h", {}))
                        c3 = len(b_data.get("18-24h", {}))
                        all_buckets_present = all(b in b_data for b in ["0-6h", "6-12h", "12-18h", "18-24h"])
                        all_buckets_sym = "âœ“" if all_buckets_present else "âœ—"
                        print(f"  {date_str}: Buckets:{all_buckets_sym} [0-6h:{c0}, 6-12h:{c1}, 12-18h:{c2}, 18-24h:{c3}] buildings", flush=True)

                # --- E. Pruning (Optional - less critical now we remove plotted dates) ---
                # Let's keep pruning based on today just to prevent truly ancient data buildup if consumers stop/start
                prune_threshold_date = date.today() - timedelta(days=20) # Keep ~20 days relative to *now*
                prune_threshold_str = prune_threshold_date.isoformat()

                prune_keys_b = [k for k in building_data_store if k < prune_threshold_str]
                prune_keys_s = [k for k in site_data_store if k < prune_threshold_str]
                # (Keep the deletion loop from previous version)
                pruned_b_count = 0
                for key in prune_keys_b:
                    if key in building_data_store: del building_data_store[key]; pruned_b_count += 1
                pruned_s_count = 0
                for key in prune_keys_s:
                    if key in site_data_store: del site_data_store[key]; pruned_s_count += 1

                if pruned_b_count > 0 or pruned_s_count > 0:
                     print(f"Pruned {pruned_b_count} bldg / {pruned_s_count} site days (older than {prune_threshold_str}).", flush=True)


            # --- Wait ---
            plt.pause(5.0) # Keep the 5-second interval

        except Exception as e:
            print(f"[Plotter Error] An error occurred: {e}. Retrying in 5s.", flush=True)
            traceback.print_exc()
            time.sleep(5) # Use time.sleep if plt.pause causes issues after error

# --- 5. Main Execution Block ---
def cleanup_consumers():
    """Function to close all Kafka consumers on exit."""
    print("Shutting down all Kafka consumers...", flush=True)
    for c in consumers:
        try:
            c.close()
        except Exception as e:
            print(f"Error closing consumer: {e}", flush=True)

atexit.register(cleanup_consumers)

try:
    print("Initializing plots...", flush=True)
    fig_building, axes_building = plt.subplots(2, 2, figsize=(12, 9))
    fig_building.show()

    fig_site, axes_site = plt.subplots(2, 1, figsize=(12, 9))
    fig_site.show()
    print("Plots initialized.", flush=True)

    thread_building = Thread(target=consumer_thread_building, daemon=True)
    thread_site = Thread(target=consumer_thread_site, daemon=True)
    thread_plotter = Thread(
        target=plotter_and_manager_thread,
        args=(fig_building, axes_building, fig_site, axes_site),
        daemon=True
    )

    print("Starting all threads...", flush=True)
    thread_building.start()
    thread_site.start()
    thread_plotter.start()

    print("--- All consumer and plotter threads are running. ---", flush=True)
    print("--- Plotter will update every 5s if data for the target date (7 days ago) is ready. ---", flush=True)
    print("--- Interrupt the kernel (press 'i' twice or stop button) to end. ---", flush=True)
    while True: time.sleep(10)

except Exception as e:
    print(f"Failed to start threads: {e}", flush=True)
    cleanup_consumers()

In [2]:
# Gemini v3 clean slate
# --- Cell 1: Imports ---
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from kafka3 import KafkaConsumer
import json
import time
import atexit
import threading
from collections import deque
from datetime import datetime, timedelta
import warnings

# Suppress harmless warnings
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning)

print("Libraries imported successfully.")
# --- Cell 2: Enable Interactive Plotting ---
# Use 'ipympl' for a better interactive experience (requires 'pip install ipympl')
# Or use 'notebook' for the classic interactive backend.
%matplotlib ipympl
# %matplotlib notebook

# --- Cell 3: Configuration ---
HOST_IP = "192.168.0.6"  # <<< SET YOUR KAFKA SERVER IP HERE
TOPIC_BUILDING = "building_6h"
TOPIC_SITE = "site_daily"
DATA_BUFFER_SIZE = 1000 # Hold up to 1000 data points per buffer

# File paths for Task 3.1
METERS_FILE = "data/new_meters.csv"
BUILDING_INFO_FILE = "data/new_building_information.csv"

print(f"Configuration set. Host: {HOST_IP}")

# --- Cell 4: Task 3.1 - Load and Pre-process Actual Data ---

def load_actual_data(meters_file, building_file):
    """
    Loads and pre-processes actual meter and building data to create a
    lookup table for daily energy consumption per site.
    """
    try:
        # Load static datasets [cite: 45, 150]
        print(f"Loading {meters_file}...")
        meters_df = pd.read_csv(meters_file)
        
        print(f"Loading {building_file}...")
        buildings_df = pd.read_csv(building_file)

        # --- Pre-process Meter Data ---
        # Convert 'ts' to datetime objects and extract date [cite: 231]
        meters_df['ts'] = pd.to_datetime(meters_df['ts'])
        meters_df['date'] = meters_df['ts'].dt.date
        
        # Aggregate to get total power per building per day
        # This sums up all meter types ('e', 'c', 's', 'h') for each building [cite: 231]
        daily_building_actual = meters_df.groupby(['building_id', 'date'])['value'].sum().reset_index()

        # --- Pre-process Building Data ---
        # Ensure correct column names (Metadata has 'site id' with a space) [cite: 234]
        if 'site id' in buildings_df.columns:
            buildings_df = buildings_df.rename(columns={'site id': 'site_id'})
        
        # Keep only necessary columns for merging
        building_site_map = buildings_df[['building_id', 'site_id']]

        # --- Merge and Final Aggregation ---
        # Merge actual building data with site map
        daily_site_actual_merged = daily_building_actual.merge(building_site_map, on='building_id')
        
        # Aggregate by site_id and date to get the final "actual" value
        actual_data_lookup_df = daily_site_actual_merged.groupby(['site_id', 'date'])['value'].sum().reset_index()
        
        # Ensure date column is a date object for proper comparison
        actual_data_lookup_df['date'] = pd.to_datetime(actual_data_lookup_df['date']).dt.date
        
        print("Successfully loaded and pre-processed actual data lookup.")
        return actual_data_lookup_df

    except FileNotFoundError as e:
        print(f"Error: {e}. Make sure '{METERS_FILE}' and '{BUILDING_INFO_FILE}' are in the same directory.")
        return None
    except Exception as e:
        print(f"An error occurred during data loading: {e}")
        return None

# Load the data
actual_data_lookup = load_actual_data(METERS_FILE, BUILDING_INFO_FILE)
if actual_data_lookup is not None:
    print(f"Loaded {len(actual_data_lookup)} actual daily site records.")
    
# --- Cell 5: Global State and Thread Management ---

# Thread-safe data buffers
# INCREASED MAXLEN: 1000 is too small for the stream's throughput.
DATA_BUFFER_SIZE = 50000 
building_data_buffer = deque(maxlen=DATA_BUFFER_SIZE)
site_data_buffer = deque(maxlen=DATA_BUFFER_SIZE)

# Shared state variables
latest_date_received = None 
data_lock = threading.Lock() 
stop_event = threading.Event() 
consumer_threads = []
consumers_list = [] 

# Track the last date we plotted
last_plotted_date = None

print(f"Global state variables initialized. Buffer size: {DATA_BUFFER_SIZE}")

# --- Cell 6: Kafka Consumer Thread Function ---

def connect_kafka_consumer(topic):
    """Connects to Kafka, retrying a few times."""
    consumer = None
    for i in range(3): # Retry connection
        try:
            consumer = KafkaConsumer(
                topic,
                bootstrap_servers=[f'{HOST_IP}:9092'],
                auto_offset_reset='latest', # Start from new messages
                value_deserializer=lambda x: x.decode('utf-8'),
                # Use a unique group_id to ensure we get new data
                group_id=f'task3-{topic}-{datetime.now().timestamp()}',
                enable_auto_commit=True,
                consumer_timeout_ms=5000 # Timeout to check stop_event
            )
            consumers_list.append(consumer) # Register for cleanup
            print(f"[{topic}] Consumer connected successfully.")
            return consumer
        except Exception as e:
            print(f"[{topic}] Connection attempt {i+1} failed: {e}")
            time.sleep(2)
    return None

def kafka_consumer_thread(topic, buffer_deque):
    """
    Thread function to consume from a Kafka topic and fill a shared buffer.
    """
    global latest_date_received
    
    consumer = connect_kafka_consumer(topic)
    if not consumer:
        print(f"[{topic}] Thread exiting due to connection failure.")
        return

    print(f"[{topic}] Listening for messages...")
    try:
        while not stop_event.is_set():
            try:
                # Poll for messages with a timeout
                for message in consumer:
                    if stop_event.is_set():
                        break
                        
                    # 1. Deserialize JSON
                    data = json.loads(message.value)
                    
                    # 2. Parse date string to a date object
                    current_date = datetime.strptime(data['date'], '%Y-%m-%d').date()
                    data['date_obj'] = current_date # Store the object for easy filtering
                    
                    # 3. Acquire lock to update shared state
                    with data_lock:
                        buffer_deque.append(data)
                        
                        # Update the latest date seen across *all* streams
                        if latest_date_received is None or current_date > latest_date_received:
                            latest_date_received = current_date
                            
            except StopIteration:
                # This happens when consumer_timeout_ms is reached
                # It's our chance to check the stop_event
                continue
            except json.JSONDecodeError as e:
                print(f"[{topic}] JSON Decode Error: {e} - Skipping message: {message.value}")
            except Exception as e:
                print(f"[{topic}] Error in consumer loop: {e}")
                # Brief pause to prevent rapid-fire errors
                time.sleep(1)

    except Exception as e:
        print(f"[{topic}] Consumer thread unhandled exception: {e}")
    finally:
        print(f"[{topic}] Thread shutting down...")
        consumer.close()

print("Consumer thread function defined.")

# --- Cell 7: Plotting Setup and Functions (Tasks 3.2 & 3.3) ---

# Set up the figure and subplots
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 18))
fig.tight_layout(pad=6.0) # Add padding for titles

def update_plots(plot_date, bldg_data, site_data):
    """
    Clears and redraws all three plots for the specified date.
    """
    # --- ADDED DEBUGGING ---
    print(f"--- Plotting data for date: {plot_date} ---")
    print(f"    Received {len(bldg_data)} building records.")
    print(f"    Received {len(site_data)} site records.")
    # --- END DEBUGGING ---
    
    # --- Clear all axes ---
    ax1.clear()
    ax2.clear()
    ax3.clear()
    
    # Update main title with the date being plotted
    fig.suptitle(f"Energy Consumption Dashboard - Showing Data for: {plot_date}", fontsize=16)

    # === Plot 1 (Task 3.2a - Building 6h) ===
    if not bldg_data:
        ax1.set_title("Predicted Power Distribution (All Buildings) per 6h Interval")
        ax1.text(0.5, 0.5, "No building data to plot", horizontalalignment='center', transform=ax1.transAxes)
    else:
        bldg_df = pd.DataFrame(bldg_data)
        time_buckets = ['0-6h', '6-12h', '12-18h', '18-24h']
        bldg_df['time'] = pd.Categorical(bldg_df['time'], categories=time_buckets, ordered=True)
        
        sns.boxplot(x='time', y='total_power_6h', data=bldg_df, ax=ax1, order=time_buckets)
        
        ax1.set_title("Predicted Power Distribution (All Buildings) per 6h Interval")
        ax1.set_xlabel("Time Bucket")
        ax1.set_ylabel("Predicted Power (total_power_6h)")
        ax1.grid(axis='y', linestyle='--', alpha=0.7)

    # === Plot 2 (Task 3.2b - Site Daily) ===
    if not site_data:
        ax2.set_title("Predicted Daily Total Power per Site")
        ax2.text(0.5, 0.5, "No site data to plot", horizontalalignment='center', transform=ax2.transAxes)
    else:
        site_df = pd.DataFrame(site_data).sort_values('site_id')
        
        ax2.bar(site_df['site_id'], site_df['total_power_day'], color='tab:blue')
        
        ax2.set_title("Predicted Daily Total Power per Site")
        ax2.set_xlabel("Site ID")
        ax2.set_ylabel("Predicted Power (total_power_day)")
        ax2.set_xticks(range(16)) 
        ax2.grid(axis='y', linestyle='--', alpha=0.7)

    # === Plot 3 (Task 3.3 - Shortfall/Excess) ===
    if not site_data or actual_data_lookup is None:
        ax3.set_title("Predicted vs. Actual Power (Shortfall/Excess)")
        ax3.text(0.5, 0.5, "No predicted or actual data to plot", horizontalalignment='center', transform=ax3.transAxes)
    else:
        pred_df = pd.DataFrame(site_data)
        actual_df = actual_data_lookup[actual_data_lookup['date'] == plot_date]
        
        if actual_df.empty:
            ax3.set_title("Predicted vs. Actual Power (Shortfall/Excess)")
            ax3.text(0.5, 0.5, f"No *actual* data found for {plot_date}", horizontalalignment='center', transform=ax3.transAxes)
        else:
            merged_df = pred_df.merge(actual_df, on='site_id', suffixes=('_pred', '_actual'))
            merged_df['shortfall'] = merged_df['total_power_day'] - merged_df['value']
            
            colors = merged_df['shortfall'].apply(lambda x: 'red' if x > 0 else 'green')
            ax3.bar(merged_df['site_id'], merged_df['shortfall'], color=colors)
            ax3.axhline(0, color='black', linestyle='--') 
            
            ax3.set_title("Predicted vs. Actual Power (Shortfall/Excess)")
            ax3.set_xlabel("Site ID")
            ax3.set_ylabel("Shortfall (Predicted - Actual)")
            ax3.legend([
                plt.Rectangle((0,0),1,1, color='red'),
                plt.Rectangle((0,0),1,1, color='green')
            ], ['Shortfall (Over-predicted)', 'Excess (Under-predicted)'])
            ax3.set_xticks(range(16))
            ax3.grid(axis='y', linestyle='--', alpha=0.7)

    # --- Redraw the canvas ---
    fig.canvas.draw()
    
    # --- ADDED RENDER FAILSAFE ---
    # Force the interactive backend to flush the draw events
    fig.canvas.flush_events() 
    # --- END FAILSAFE ---
    
    print(f"--- Plot update for {plot_date} complete ---")


print("Plotting functions defined. Figure created.")
# Display the figure object once
display(fig)

# --- Cell 8: Main Application Loop (Start Threads and Plot) ---

def main_plot_loop():
    """
    Main loop to check for new data and trigger plots.
    Runs in the main thread.
    """
    global last_plotted_date
    
    print("Main plotting loop starting... Press 'Interrupt Kernel' to stop.")
    while not stop_event.is_set():
        try:
            current_latest_date = None
            plot_date = None
            
            # --- Check for new date to plot ---
            with data_lock:
                if latest_date_received:
                    current_latest_date = latest_date_received
            
            if current_latest_date:
                # YOUR LOGIC: Plot data for day X when data for day X+2 arrives
                plot_date = current_latest_date - timedelta(days=2)
            
            # --- Check if we already plotted this date ---
            if plot_date and plot_date != last_plotted_date:
                
                # --- Get data for the plot_date ---
                # We must do this in a thread-safe way
                with data_lock:
                    # Filter the buffers for the specific date
                    # This copies the data, so we can release the lock
                    building_data_to_plot = [
                        d for d in building_data_buffer 
                        if d['date_obj'] == plot_date
                    ]
                    site_data_to_plot = [
                        d for d in site_data_buffer 
                        if d['date_obj'] == plot_date
                    ]
                
                # --- Check if data is present ---
                if not building_data_to_plot and not site_data_to_plot:
                    # We saw day X+2, but we don't have data for day X
                    # This might happen if data is sparse.
                    # We'll mark it as "plotted" to avoid checking again.
                    print(f"No data found for {plot_date}, skipping.")
                    last_plotted_date = plot_date # Mark as done
                else:
                    # --- We have a new date and new data! ---
                    # Call update_plots (which is thread-safe)
                    update_plots(plot_date, building_data_to_plot, site_data_to_plot)
                    last_plotted_date = plot_date # Mark as done
            
            # Sleep to yield control and not busy-wait
            time.sleep(1) # Check for new data every second
            
        except KeyboardInterrupt:
            print("\nKeyboardInterrupt received. Shutting down...")
            break
        except Exception as e:
            print(f"Error in main_plot_loop: {e}")
            break
    
    print("Main plotting loop finished.")

# --- Cleanup function to run at script exit ---
def cleanup_all():
    """Signals all threads to stop and closes consumers."""
    print("\n--- Initiating shutdown ---")
    if not stop_event.is_set():
        stop_event.set()
        
        print("Waiting for consumer threads to join...")
        for t in consumer_threads:
            t.join(timeout=2) # Wait max 2s for thread
        
        print("Closing Kafka consumers...")
        for c in consumers_list:
            try:
                c.close()
            except Exception as e:
                print(f"Error closing consumer: {e}")
        
        plt.close(fig) # Close the plot window
        print("--- Cleanup complete ---")

# Register the cleanup function
atexit.register(cleanup_all)

# --- Start the application ---
if actual_data_lookup is not None:
    try:
        # Start consumer threads
        thread_bldg = threading.Thread(
            target=kafka_consumer_thread, 
            args=(TOPIC_BUILDING, building_data_buffer), 
            daemon=True
        )
        thread_site = threading.Thread(
            target=kafka_consumer_thread, 
            args=(TOPIC_SITE, site_data_buffer), 
            daemon=True
        )
        
        consumer_threads.extend([thread_bldg, thread_site])
        
        print("Starting consumer threads...")
        thread_bldg.start()
        thread_site.start()
        
        # Run the plotting loop in the main thread
        main_plot_loop()
        
    except Exception as e:
        print(f"An error occurred during startup: {e}")
    finally:
        # Ensure cleanup runs if the loop exits unexpectedly
        cleanup_all()
else:
    print("Application cannot start because actual data failed to load.")

print("Application has stopped.")

Libraries imported successfully.
Configuration set. Host: 192.168.0.6
Loading data/new_meters.csv...
Loading data/new_building_information.csv...
Successfully loaded and pre-processed actual data lookup.
Loaded 5781 actual daily site records.
Global state variables initialized. Buffer size: 50000
Consumer thread function defined.


<IPython.core.display.Javascript object>

Plotting functions defined. Figure created.


<IPython.core.display.Javascript object>

Starting consumer threads...
Main plotting loop starting... Press 'Interrupt Kernel' to stop.
[site_daily] Consumer connected successfully.
[site_daily] Listening for messages...
[building_6h] Consumer connected successfully.
[building_6h] Listening for messages...
--- Plotting data for date: 2022-06-03 ---
    Received 991 building records.
    Received 0 site records.
--- Plot update for 2022-06-03 complete ---
--- Plotting data for date: 2022-06-04 ---
    Received 3186 building records.
    Received 13 site records.
--- Plot update for 2022-06-04 complete ---
--- Plotting data for date: 2022-06-07 ---
    Received 91 building records.
    Received 0 site records.
--- Plot update for 2022-06-07 complete ---
--- Plotting data for date: 2022-06-08 ---
    Received 3143 building records.
    Received 39 site records.
--- Plot update for 2022-06-08 complete ---
--- Plotting data for date: 2022-06-10 ---
    Received 131 building records.
    Received 16 site records.
--- Plot update for

--- Plot update for 2022-09-02 complete ---
--- Plotting data for date: 2022-09-05 ---
    Received 223 building records.
    Received 0 site records.
--- Plot update for 2022-09-05 complete ---
--- Plotting data for date: 2022-09-06 ---
    Received 2838 building records.
    Received 24 site records.
--- Plot update for 2022-09-06 complete ---
--- Plotting data for date: 2022-09-08 ---
    Received 679 building records.
    Received 12 site records.
--- Plot update for 2022-09-08 complete ---
--- Plotting data for date: 2022-09-09 ---
    Received 3006 building records.
    Received 11 site records.
--- Plot update for 2022-09-09 complete ---
--- Plotting data for date: 2022-09-11 ---
    Received 308 building records.
    Received 32 site records.
--- Plot update for 2022-09-11 complete ---
--- Plotting data for date: 2022-09-12 ---
    Received 2944 building records.
    Received 25 site records.
--- Plot update for 2022-09-12 complete ---
--- Plotting data for date: 2022-09-15 ---

Starting consumer threads...
Main plotting loop starting... Press 'Interrupt Kernel' to stop.
[site_daily] Consumer connected successfully.
[site_daily] Listening for messages...
[building_6h] Consumer connected successfully.
[building_6h] Listening for messages...

KeyboardInterrupt received. Shutting down...
Main plotting loop finished.

--- Initiating shutdown ---
Waiting for consumer threads to join...


In [None]:
# Gemini attempt This is your corrected Task 3 cell with a Jupyter Log Widget
from kafka3 import KafkaConsumer
import json
import matplotlib.pyplot as plt
from datetime import datetime
from threading import Thread, Lock
import time
import copy
import ipywidgets as widgets
from IPython.display import display

# This magic command is necessary for live plotting in Jupyter
%matplotlib notebook

# --- 1. Configuration ---
hostip = "192.168.0.6"
topic_building = "building_6h"
topic_site = "site_daily"
PLOT_UPDATE_INTERVAL = 5 # 5 seconds
DATA_BUFFER_DAYS = 7 # Store 7 days of data

# --- 2. Shared Data Stores, Lock, and Max Y-Values ---
data_lock = Lock()
data_store_building = {}
data_store_site = {}
max_y_building = 500
max_y_site = 5000

# --- 3. Kafka Connection Function (Unchanged) ---
def connect_kafka_consumer(topic):
    """Connects to a specific Kafka topic."""
    return KafkaConsumer(
        topic,
        bootstrap_servers=[f'{hostip}:9092'],
        auto_offset_reset='earliest', 
        value_deserializer=lambda x: json.loads(x.decode('utf-8')),
        request_timeout_ms=10000 
    )

# --- 4. Consumer Functions (Data Collection ONLY) ---
# --- NOTICE: All print calls are now 'with log_widget: print(...)' ---

def consume_building_6h(consumer, log_widget):
    """
    Consumes from 'building_6h'.
    This function ONLY updates the shared data_store_building.
    """
    global max_y_building
    with log_widget:
        print(f"[{topic_building}] Starting consumer thread...")
    
    try:
        with log_widget:
            print(f"[{topic_building}] Consumer connected. Waiting for messages...")
        
        msg_count = 0
        for message in consumer:
            msg_count += 1
            if msg_count % 100 == 0: 
                with log_widget:
                    print(f"[{topic_building}] ... {msg_count} messages received ...")

            msg = message.value
            bldg_id = msg.get('building_id')
            time_bucket = msg.get('time')
            val = msg.get('total_power_6h') # This can be 0.0
            date_str = msg.get('date', "Unknown Date") 
            
            # --- THIS IS THE FIX ---
            # We now check for 'None' explicitly, just like in consume_site_daily.
            # This prevents 0.0 values from being filtered out.
            if bldg_id is None or time_bucket is None or val is None or date_str == "Unknown Date":
                continue
            # --- END OF FIX ---
                
            bldg_id_str = str(bldg_id)

            with data_lock:
                if date_str not in data_store_building:
                    data_store_building[date_str] = {
                        "0-6h": {}, "6-12h": {}, "12-18h": {}, "18-24h": {}
                    }
                data_store_building[date_str][time_bucket][bldg_id_str] = val
                if val > max_y_building:
                    max_y_building = val

    except Exception as e:
        with log_widget:
            print(f"CRITICAL Error in consumer for {topic_building}: {e}")
    finally:
        with log_widget:
            print(f"Closing consumer for {topic_building}")
        consumer.close()


def consume_site_daily(consumer, log_widget):
    """
    Consumes from 'site_daily'.
    This function ONLY updates the shared data_store_site.
    """
    global max_y_site
    with log_widget:
        print(f"[{topic_site}] Starting consumer thread...")
    all_sites = [str(i) for i in range(16)]
    
    try:
        with log_widget:
            print(f"[{topic_site}] Consumer connected. Waiting for messages...")
        
        msg_count = 0
        for message in consumer:
            msg_count += 1
            if msg_count % 20 == 0: # Print a heartbeat every 20 messages
                 with log_widget:
                    print(f"[{topic_site}] ... {msg_count} messages received ...")
            
            msg = message.value
            site_id = msg.get('site_id')
            val = msg.get('total_power_day')
            date_str = msg.get('date', "Unknown Date") 
            
            if site_id is None or val is None or date_str == "Unknown Date":
                continue
                
            site_id_str = str(site_id)

            with data_lock:
                if date_str not in data_store_site:
                    data_store_site[date_str] = {site: 0 for site in all_sites}
                data_store_site[date_str][site_id_str] = val
                if val > max_y_site:
                    max_y_site = val

    except Exception as e:
        with log_widget:
            print(f"CRITICAL Error in consumer for {topic_site}: {e}")
    finally:
        with log_widget:
            print(f"Closing consumer for {topic_site}")
        consumer.close()

# --- 5. Plotting Function (Runs in its own Thread) ---

def redraw_plots(fig_building, axes_building, fig_site, axes_site, log_widget):
    """
    This function runs in a loop, redrawing all plots
    every PLOT_UPDATE_INTERVAL seconds.
    """
    with log_widget:
        print(f"Starting redraw thread. Updates every {PLOT_UPDATE_INTERVAL}s.")
        
    time_buckets = ["0-6h", "6-12h", "12-18h", "18-24h"]
    ax_map = {
        "0-6h": axes_building[0, 0],
        "6-12h": axes_building[0, 1],
        "12-18h": axes_building[1, 0],
        "18-24h": axes_building[1, 1]
    }
    all_sites = [str(i) for i in range(16)]
    
    while True:
        try:
            time.sleep(PLOT_UPDATE_INTERVAL)
            with log_widget:
                print(f"\n[{datetime.now()}] Plotter waking up...")
            
            local_store_building = {}
            local_store_site = {}
            sorted_dates = []
            current_max_y_bldg = 500
            current_max_y_site = 5000
            
            with data_lock:
                all_dates_set = set(data_store_building.keys()) | set(data_store_site.keys())
                sorted_dates_all = sorted(list(all_dates_set))
                
                if len(sorted_dates_all) > DATA_BUFFER_DAYS:
                    date_to_drop = sorted_dates_all[0]
                    with log_widget:
                        print(f"Data buffer full ({len(sorted_dates_all)} days). Dropping oldest: {date_to_drop}")
                    data_store_building.pop(date_to_drop, None)
                    data_store_site.pop(date_to_drop, None)
                
                local_store_building = copy.deepcopy(data_store_building)
                local_store_site = copy.deepcopy(data_store_site)
                current_max_y_bldg = max_y_building
                current_max_y_site = max_y_site
                
                final_dates_set = set(local_store_building.keys()) | set(local_store_site.keys())
                sorted_dates = sorted(list(final_dates_set))

                # --- Debugger Print ---
                with log_widget:
                    print("--- DEBUGGER: Data Points Per Date ---")
                    if not sorted_dates:
                        print("  (No data collected yet)")
                    for date in sorted_dates:
                        site_count = len(local_store_site.get(date, {}))
                        bldg_buckets = local_store_building.get(date, {})
                        bldg_count = sum(len(bldg_buckets.get(bucket, {})) for bucket in bldg_buckets)
                        print(f"  {date}: Site_Daily_Points={site_count}, Building_6h_Points={bldg_count}")
                    print(f"Total unique dates stored: {len(sorted_dates)}")
                    print("----------------------------------------")

            # --- Plotting Gate ---
            if len(sorted_dates) < DATA_BUFFER_DAYS:
                with log_widget:
                    print(f"Waiting for {DATA_BUFFER_DAYS} days of data. Collected {len(sorted_dates)}/{DATA_BUFFER_DAYS}.")
                
                # Clear plots to show we are waiting
                fig_building.suptitle(f"Waiting for {DATA_BUFFER_DAYS} days of data...", y=1.02)
                for ax in axes_building.flat: 
                    ax.cla()
                    ax.set_title("Waiting for data...")
                    ax.set_ylim(bottom=0, top=current_max_y_bldg * 1.1)
                fig_building.canvas.draw()
                
                ax_prev, ax_curr = axes_site[0], axes_site[1]
                ax_prev.cla()
                ax_prev.set_title(f"Waiting for {DATA_BUFFER_DAYS} days of data...")
                ax_prev.set_ylim(bottom=0, top=current_max_y_site * 1.1)
                ax_curr.cla()
                ax_curr.set_title("Waiting for data...")
                ax_curr.set_ylim(bottom=0, top=current_max_y_site * 1.1)
                fig_site.canvas.draw()
                
                continue 

            # --- Plotting Logic (Buffer is full) ---
            with log_widget:
                print("Buffer full. Plotting most recent data.")
            
            building_plot_date_str = sorted_dates[-1]
            current_plot_date_str = sorted_dates[-1]
            previous_plot_date_str = sorted_dates[-2]
            
            # --- Plot Building Data ---
            data_for_date = local_store_building.get(building_plot_date_str, {})
            fig_building.suptitle(f"Building Top 8 Power Consumption (Date: {building_plot_date_str})", 
                                 y=1.02)
            ylim_bldg = current_max_y_bldg * 1.1

            for bucket in time_buckets:
                ax = ax_map[bucket]
                bucket_data = data_for_date.get(bucket, {})
                sorted_items = sorted(bucket_data.items(), key=lambda item: item[1], reverse=True)
                top_8_items = sorted_items[:8]

                ax.cla()
                if top_8_items:
                    labels, values = zip(*top_8_items)
                    ax.bar(labels, values)
                    ax.set_xticklabels(labels, rotation=75)
                
                ax.set_title(f"Time Bucket: {bucket}")
                ax.set_ylabel("Total Power (6h)")
                ax.set_ylim(bottom=0, top=ylim_bldg)
            
            fig_building.tight_layout(rect=[0, 0, 1, 0.96])
            fig_building.canvas.draw()

            # --- Plot Site Data ---
            ylim_site = current_max_y_site * 1.1
            default_site_data = {site: 0 for site in all_sites}
            current_day_data = local_store_site.get(current_plot_date_str, default_site_data)
            previous_day_data = local_store_site.get(previous_plot_date_str, default_site_data)
            
            curr_values = [current_day_data.get(site, 0) for site in all_sites]
            prev_values = [previous_day_data.get(site, 0) for site in all_sites]

            # Plot Previous Day
            ax_prev = axes_site[0]
            ax_prev.cla()
            ax_prev.bar(all_sites, prev_values, color='gray')
            ax_prev.set_title(f"Previous Day's Total Usage (Date: {previous_plot_date_str})")
            ax_prev.set_ylabel("Total Power (Daily)")
            ax_prev.set_ylim(bottom=0, top=ylim_site)

            # Plot Current Day
            ax_curr = axes_site[1]
            ax_curr.cla()
            ax_curr.bar(all_sites, curr_values, color='blue')
            ax_curr.set_title(f"Current Day's Total Usage (Date: {current_plot_date_str})")
            ax_curr.set_xlabel("Site ID")
            ax_curr.set_ylabel("Total Power (Daily)")
            ax_curr.set_ylim(bottom=0, top=ylim_site)

            fig_site.tight_layout()
            fig_site.canvas.draw()
            
            with log_widget:
                print(f"[{datetime.now()}] Plot redraw complete.")
        
        except Exception as e:
            with log_widget:
                print(f"CRITICAL Error in redraw thread: {e}")
            time.sleep(PLOT_UPDATE_INTERVAL)


# --- 6. Main execution block ---
try:
    # --- NEW: Create and display the Log Output Widget ---
    print("Main: Creating log output widget...")
    log_output_widget = widgets.Output()
    display(log_output_widget)
    
    with log_output_widget:
        print("Log widget initialized. Thread output will appear here.")
    # ---
    
    with log_output_widget:
        print("Main: Connecting to Kafka consumers...")
    consumer_building = connect_kafka_consumer(topic_building)
    consumer_site = connect_kafka_consumer(topic_site)
    with log_output_widget:
        print("Main: Kafka consumers connected.")

    # Initialize plots
    print("Main: Initializing plots...") # This print is fine, it's in the main cell
    fig_building, axes_building = plt.subplots(2, 2, figsize=(10, 8))
    fig_building.show()
    
    fig_site, axes_site = plt.subplots(2, 1, figsize=(10, 8))
    fig_site.show()
    print("Main: Plot figures created.")

    # Create and start consumer threads, passing the log widget
    thread_building = Thread(target=consume_building_6h, 
                             args=(consumer_building, log_output_widget)) # Pass widget
    thread_site = Thread(target=consume_site_daily, 
                         args=(consumer_site, log_output_widget)) # Pass widget
    
    # Create and start the single plotting thread
    thread_plotter = Thread(target=redraw_plots, 
                            args=(fig_building, axes_building, fig_site, axes_site, log_output_widget)) # Pass widget
    
    # Set threads as daemons
    thread_building.daemon = True
    thread_site.daemon = True
    thread_plotter.daemon = True
    
    with log_output_widget:
        print("Main: Starting all threads...")
    thread_building.start()
    thread_site.start()
    thread_plotter.start()
    
    with log_output_widget:
        print(f"Main: All consumer and plotter threads started.")

except Exception as e:
    # If setup fails, print to both the widget and the main cell
    print(f"Main: CRITICAL Failed to start threads: {e}")
    with log_output_widget:
        print(f"Main: CRITICAL Failed to start threads: {e}")

# Note: The threads will run in the background. 
# You will need to "Interrupt" or "Restart" the kernel to stop them.

In [None]:
## Original that worked, but lots of flickering
# This is your corrected Task 3 cell
from kafka3 import KafkaConsumer
import json
import matplotlib.pyplot as plt
from datetime import datetime
from threading import Thread
import time

# This magic command is necessary for live plotting in Jupyter
%matplotlib notebook

# --- 1. Configuration ---
hostip = "192.168.0.6"
topic_building = "building_6h"
topic_site = "site_daily"

def connect_kafka_consumer(topic):
    """Connects to a specific Kafka topic."""
    return KafkaConsumer(
        topic,
        bootstrap_servers=[f'{hostip}:9092'],
        auto_offset_reset='earliest', # Start from the beginning
        value_deserializer=lambda x: json.loads(x.decode('utf-8'))
    )

# --- 2. Plotting Function for "building_6h" ---

def consume_building_6h(consumer, fig, axes):
    """
    Consumes from 'building_6h' and updates a 2x2 grid plot 
    showing the Top 8 buildings for each 6-hour time bucket.
    """
    # 4 time buckets, one for each subplot
    time_buckets = ["0-6h", "6-12h", "12-18h", "18-24h"]
    
    # Map time buckets to the 2x2 grid of axes
    ax_map = {
        "0-6h": axes[0, 0],
        "6-12h": axes[0, 1],
        "12-18h": axes[1, 0],
        "18-24h": axes[1, 1]
    }

    # Data store: { "0-6h": {"bldg_1": 10, "bldg_2": 20}, "6-12h": {...}, ... }
    data_store = {bucket: {} for bucket in time_buckets}
    current_date = "..."
    
    print(f"Starting consumer for topic: {topic_building}")
    
    try:
        for message in consumer:
            msg = message.value
            
            # Extract data from the message
            bldg_id = msg.get('building_id')
            time_bucket = msg.get('time')
            val = msg.get('total_power_6h')
            # The 'date' field you added from the window
            date_str = msg.get('date', "Unknown Date") 
            
            if not all([bldg_id, time_bucket, val]):
                print(f"[{topic_building}] Skipping malformed message: {msg}")
                continue
                
            bldg_id_str = str(bldg_id)

            # --- Date Change Logic ---
            if date_str != current_date:
                print(f"[{topic_building}] New Date detected: {date_str}. Clearing data.")
                current_date = date_str
                # Reset data for the new day
                data_store = {bucket: {} for bucket in time_buckets}
                # Update the main figure title
                fig.suptitle(f"Building Top 8 Power Consumption (Date: {current_date})", 
                             y=1.02)
            
            # --- Update Data Store ---
            data_store[time_bucket][bldg_id_str] = val
            
            # --- Redraw the specific subplot that changed ---
            ax = ax_map[time_bucket]
            bucket_data = data_store[time_bucket]
            
            # Sort by value (highest first) and take Top 8
            sorted_items = sorted(bucket_data.items(), 
                                  key=lambda item: item[1], 
                                  reverse=True)
            top_8_items = sorted_items[:8]

            # Clear this specific subplot
            ax.cla()

            if top_8_items:
                # Unzip the (key, value) pairs
                labels, values = zip(*top_8_items)
                
                # Plot the new bars
                ax.bar(labels, values)
                ax.set_xticklabels(labels, rotation=75) # Rotate for readability
            
            ax.set_title(f"Time Bucket: {time_bucket}")
            ax.set_ylabel("Total Power (6h)")
            
            # Redraw the canvas
            fig.tight_layout(rect=[0, 0, 1, 0.96]) # Adjust for suptitle
            fig.canvas.draw()
            plt.pause(0.01)

    except Exception as e:
        print(f"Error in consumer for {topic_building}: {e}")
    finally:
        print(f"Closing consumer for {topic_building}")
        consumer.close()


# --- 3. Plotting Function for "site_daily" ---

def consume_site_daily(consumer, fig, axes):
    """
    Consumes from 'site_daily' and updates a 2x1 grid plot 
    showing current vs. previous day's usage by site.
    """
    ax_prev, ax_curr = axes[0], axes[1] # Top and bottom plots
    
    # Static x-axis with all site IDs (0-15)
    all_sites = [str(i) for i in range(16)]
    
    # Data stores, initialized to 0 for all sites
    current_day_data = {site: 0 for site in all_sites}
    previous_day_data = {site: 0 for site in all_sites}
    current_date = None
    
    print(f"Starting consumer for topic: {topic_site}")
    
    try:
        for message in consumer:
            msg = message.value
            
            site_id = msg.get('site_id')
            val = msg.get('total_power_day')
            # The 'date' field from the window
            date_str = msg.get('date', "Unknown Date") 
            
            if site_id is None or val is None:
                print(f"[{topic_site}] Skipping malformed message: {msg}")
                continue
                
            site_id_str = str(site_id)

            # --- Date Change Logic ---
            if current_date is None:
                current_date = date_str
            
            if date_str != current_date:
                print(f"[{topic_site}] New Date detected: {date_str}. Shifting data.")
                # The "current" day becomes the "previous" day
                previous_day_data = current_day_data.copy()
                # Start a new "current" day
                current_day_data = {site: 0 for site in all_sites}
                current_date = date_str

            # --- Update Data Store ---
            # Update the value for the specific site
            current_day_data[site_id_str] = val
            
            # --- Redraw BOTH subplots ---
            
            # 1. Previous Day Plot (Top)
            ax_prev.cla()
            prev_values = [previous_day_data[site] for site in all_sites]
            ax_prev.bar(all_sites, prev_values, color='gray')
            ax_prev.set_title("Previous Day's Total Usage")
            ax_prev.set_ylabel("Total Power (Daily)")
            ax_prev.set_ylim(bottom=0) # Keep y-axis from 0

            # 2. Current Day Plot (Bottom)
            ax_curr.cla()
            curr_values = [current_day_data[site] for site in all_sites]
            ax_curr.bar(all_sites, curr_values, color='blue')
            ax_curr.set_title(f"Current Day's Total Usage (Date: {current_date})")
            ax_curr.set_xlabel("Site ID")
            ax_curr.set_ylabel("Total Power (Daily)")
            ax_curr.set_ylim(bottom=0) # Keep y-axis from 0

            # Redraw the canvas
            fig.tight_layout()
            fig.canvas.draw()
            plt.pause(0.01)

    except Exception as e:
        print(f"Error in consumer for {topic_site}: {e}")
    finally:
        print(f"Closing consumer for {topic_site}")
        consumer.close()


# --- 4. Main execution block ---
try:
    # Connect to Kafka
    consumer_building = connect_kafka_consumer(topic_building)
    consumer_site = connect_kafka_consumer(topic_site)

    # Initialize plots
    # Plot 1: 2x2 grid for Building data
    fig_building, axes_building = plt.subplots(2, 2, figsize=(10, 8))
    fig_building.show()
    
    # Plot 2: 2x1 grid for Site data
    fig_site, axes_site = plt.subplots(2, 1, figsize=(10, 8))
    fig_site.show()

    # Create and start threads
    thread_building = Thread(target=consume_building_6h, 
                             args=(consumer_building, fig_building, axes_building))
    thread_site = Thread(target=consume_site_daily, 
                         args=(consumer_site, fig_site, axes_site))
    
    thread_building.start()
    thread_site.start()
    
    print(f"All consumer threads started.")

except Exception as e:
    print(f"Failed to start consumers: {e}")

# Note: The threads will run in the background. 
# You will need to "Interrupt" or "Restart" the kernel to stop them.

In [None]:
topic_site = "site_daily"

consumer_site = KafkaConsumer(
    topic_site,
    bootstrap_servers=[f'{hostip}:9092'],
    auto_offset_reset='earliest',
    value_deserializer=lambda x: json.loads(x.decode('utf-8'))
)

# Shared Data
site_current = {str(i): 0 for i in range(16)}
site_previous = {str(i): 0 for i in range(16)}
current_date_site = None
lock_site = threading.Lock()

def kafka_site_thread():
    global current_date_site, site_current, site_previous
    for msg in consumer_site:
        data = msg.value
        site_id = data.get('site_id')
        val = data.get('total_power_day')
        date_str = data.get('date', "Unknown Date")
        if site_id is None or val is None:
            continue
        site_id = str(site_id)

        with lock_site:
            if current_date_site is None:
                current_date_site = date_str
            elif date_str != current_date_site:
                site_previous = site_current.copy()
                site_current = {str(i): 0 for i in range(16)}
                current_date_site = date_str
            site_current[site_id] = val

threading.Thread(target=kafka_site_thread, daemon=True).start()

# --- Matplotlib Figure ---
fig_s, axes_s = plt.subplots(2, 1, figsize=(10, 8))
ax_prev, ax_curr = axes_s
fig_s.suptitle("Site Daily Power Consumption", y=1.02)
all_sites = [str(i) for i in range(16)]
min_ylim_site = 5000

# Initialize with placeholders
for ax, title in zip([ax_prev, ax_curr], ["Previous Day", "Current Day"]):
    ax.set_title(f"{title} (waiting for data...)")
    ax.set_ylim(0, min_ylim_site)
    ax.set_ylabel("Total Power (Daily)")
    ax.set_xlabel("Site ID")

def update_site_plot(_):
    with lock_site:
        prev_vals = np.array([site_previous[s] for s in all_sites])
        curr_vals = np.array([site_current[s] for s in all_sites])
        ymax = max(prev_vals.max(), curr_vals.max(), min_ylim_site) * 1.1

        ax_prev.clear()
        ax_prev.bar(all_sites, prev_vals, color='gray')
        ax_prev.set_title("Previous Day")
        ax_prev.set_ylim(0, ymax)
        ax_prev.set_ylabel("Total Power (Daily)")

        ax_curr.clear()
        ax_curr.bar(all_sites, curr_vals, color='blue')
        ax_curr.set_title(f"Current Day ({current_date_site or 'waiting...'})")
        ax_curr.set_ylim(0, ymax)
        ax_curr.set_xlabel("Site ID")
        ax_curr.set_ylabel("Total Power (Daily)")

    fig_s.tight_layout(rect=[0, 0, 1, 0.96])

ani_site = FuncAnimation(
    fig_s,
    update_site_plot,
    interval=100,
    cache_frame_data=False
)
plt.show()


In [None]:
# This is your corrected Task 3 cell
from kafka3 import KafkaConsumer
import json
import matplotlib.pyplot as plt
from datetime import datetime
from threading import Thread
import time

# This magic command is necessary for live plotting in Jupyter
%matplotlib notebook

# --- 1. Configuration ---
hostip = "192.168.0.6"
topic_building = "building_6h"
topic_site = "site_daily"

def connect_kafka_consumer(topic):
    """Connects to a specific Kafka topic."""
    return KafkaConsumer(
        topic,
        bootstrap_servers=[f'{hostip}:9092'],
        auto_offset_reset='earliest', # Start from the beginning
        value_deserializer=lambda x: json.loads(x.decode('utf-8'))
    )

# --- 2. Plotting Function for "building_6h" ---

def consume_building_6h(consumer, fig, axes):
    """
    Consumes from 'building_6h' and updates a 2x2 grid plot 
    showing the Top 8 buildings for each 6-hour time bucket.
    """
    # 4 time buckets, one for each subplot
    time_buckets = ["0-6h", "6-12h", "12-18h", "18-24h"]
    
    # Map time buckets to the 2x2 grid of axes
    ax_map = {
        "0-6h": axes[0, 0],
        "6-12h": axes[0, 1],
        "12-18h": axes[1, 0],
        "18-24h": axes[1, 1]
    }

    # Data store: { "0-6h": {"bldg_1": 10, "bldg_2": 20}, "6-12h": {...}, ... }
    data_store = {bucket: {} for bucket in time_buckets}
    current_date = "..."
    
    print(f"Starting consumer for topic: {topic_building}")
    
    try:
        for message in consumer:
            msg = message.value
            
            # Extract data from the message
            bldg_id = msg.get('building_id')
            time_bucket = msg.get('time')
            val = msg.get('total_power_6h')
            # The 'date' field you added from the window
            date_str = msg.get('date', "Unknown Date") 
            
            if not all([bldg_id, time_bucket, val]):
                print(f"[{topic_building}] Skipping malformed message: {msg}")
                continue
                
            bldg_id_str = str(bldg_id)

            # --- Date Change Logic ---
            if date_str != current_date:
                print(f"[{topic_building}] New Date detected: {date_str}. Clearing data.")
                current_date = date_str
                # Reset data for the new day
                data_store = {bucket: {} for bucket in time_buckets}
                # Update the main figure title
                fig.suptitle(f"Building Top 8 Power Consumption (Date: {current_date})", 
                             y=1.02)
            
            # --- Update Data Store ---
            data_store[time_bucket][bldg_id_str] = val
            
            # --- Redraw the specific subplot that changed ---
            ax = ax_map[time_bucket]
            bucket_data = data_store[time_bucket]
            
            # Sort by value (highest first) and take Top 8
            sorted_items = sorted(bucket_data.items(), 
                                  key=lambda item: item[1], 
                                  reverse=True)
            top_8_items = sorted_items[:8]

            # Clear this specific subplot
            ax.cla()

            if top_8_items:
                # Unzip the (key, value) pairs
                labels, values = zip(*top_8_items)
                
                # Plot the new bars
                ax.bar(labels, values)
                ax.set_xticklabels(labels, rotation=75) # Rotate for readability
            
            ax.set_title(f"Time Bucket: {time_bucket}")
            ax.set_ylabel("Total Power (6h)")
            
            # Redraw the canvas
            fig.tight_layout(rect=[0, 0, 1, 0.96]) # Adjust for suptitle
            fig.canvas.draw()
            plt.pause(1)

    except Exception as e:
        print(f"Error in consumer for {topic_building}: {e}")
    finally:
        print(f"Closing consumer for {topic_building}")
        consumer.close()


# --- 3. Plotting Function for "site_daily" ---

def consume_site_daily(consumer, fig, axes):
    """
    Consumes from 'site_daily' and updates a 2x1 grid plot 
    showing current vs. previous day's usage by site.
    """
    ax_prev, ax_curr = axes[0], axes[1] # Top and bottom plots
    
    # Static x-axis with all site IDs (0-15)
    all_sites = [str(i) for i in range(16)]
    
    # Data stores, initialized to 0 for all sites
    current_day_data = {site: 0 for site in all_sites}
    previous_day_data = {site: 0 for site in all_sites}
    current_date = None
    
    print(f"Starting consumer for topic: {topic_site}")
    
    try:
        for message in consumer:
            msg = message.value
            
            site_id = msg.get('site_id')
            val = msg.get('total_power_day')
            # The 'date' field from the window
            date_str = msg.get('date', "Unknown Date") 
            
            if site_id is None or val is None:
                print(f"[{topic_site}] Skipping malformed message: {msg}")
                continue
                
            site_id_str = str(site_id)

            # --- Date Change Logic ---
            if current_date is None:
                current_date = date_str
            
            if date_str != current_date:
                print(f"[{topic_site}] New Date detected: {date_str}. Shifting data.")
                # The "current" day becomes the "previous" day
                previous_day_data = current_day_data.copy()
                # Start a new "current" day
                current_day_data = {site: 0 for site in all_sites}
                current_date = date_str

            # --- Update Data Store ---
            # Update the value for the specific site
            current_day_data[site_id_str] = val
            
            # --- Redraw BOTH subplots ---
            
            # 1. Previous Day Plot (Top)
            ax_prev.cla()
            prev_values = [previous_day_data[site] for site in all_sites]
            ax_prev.bar(all_sites, prev_values, color='gray')
            ax_prev.set_title("Previous Day's Total Usage")
            ax_prev.set_ylabel("Total Power (Daily)")
            ax_prev.set_ylim(bottom=0) # Keep y-axis from 0

            # 2. Current Day Plot (Bottom)
            ax_curr.cla()
            curr_values = [current_day_data[site] for site in all_sites]
            ax_curr.bar(all_sites, curr_values, color='blue')
            ax_curr.set_title(f"Current Day's Total Usage (Date: {current_date})")
            ax_curr.set_xlabel("Site ID")
            ax_curr.set_ylabel("Total Power (Daily)")
            ax_curr.set_ylim(bottom=0) # Keep y-axis from 0

            # Redraw the canvas
            fig.tight_layout()
            fig.canvas.draw()
            plt.pause(1)

    except Exception as e:
        print(f"Error in consumer for {topic_site}: {e}")
    finally:
        print(f"Closing consumer for {topic_site}")
        consumer.close()


# --- 4. Main execution block ---
try:
    # Connect to Kafka
    consumer_building = connect_kafka_consumer(topic_building)
    consumer_site = connect_kafka_consumer(topic_site)

    # Initialize plots
    # Plot 1: 2x2 grid for Building data
    fig_building, axes_building = plt.subplots(2, 2, figsize=(10, 8))
    fig_building.show()
    
    # Plot 2: 2x1 grid for Site data
    fig_site, axes_site = plt.subplots(2, 1, figsize=(10, 8))
    fig_site.show()

    # Create and start threads
    thread_building = Thread(target=consume_building_6h, 
                             args=(consumer_building, fig_building, axes_building))
    thread_site = Thread(target=consume_site_daily, 
                         args=(consumer_site, fig_site, axes_site))
    
    thread_building.start()
    thread_site.start()
    
    print(f"All consumer threads started.")

except Exception as e:
    print(f"Failed to start consumers: {e}")

# Note: The threads will run in the background. 
# You will need to "Interrupt" or "Restart" the kernel to stop them.

1.	Load the new meters CSV file into a data frame.

In [None]:
from pyspark.sql.types import (
    StructType, StructField,
    IntegerType, StringType, DecimalType, TimestampType, DateType, DoubleType
)
# 1. Meters Table
meters_schema = StructType([
    StructField("building_id", IntegerType(), False),
    StructField("meter_type", StringType(), False),   # Char(1) -> StringType
    StructField("ts", TimestampType(), False),
    StructField("value", DecimalType(15, 4), False),
    StructField("row_id", IntegerType(), False)
])

new_meters_df = spark.read.csv(
    "data/new_meters.csv",
    header=True,
    schema=meters_schema
)

2.	Plot two diagrams to show data from 6b and 6c. You are free to choose the type of plot.

3.	Plot a diagram to visualise the daily shortfall/excess energy in each site. The shortfall/excess energy is defined as the predicted total sum of energy in each site, minus the metered data (the value can be positive or negative, depending on the model and data quality).