# EcoHome Database Setup
## Initialize SQLite database with sample energy and solar data

This notebook:
1. Creates the database schema
2. Generates realistic sample data for energy usage
3. Generates realistic sample data for solar generation
4. Verifies the data was inserted correctly

In [None]:
# Import required libraries
import sys
import os
from datetime import datetime, timedelta
import random
import math

# Add parent directory to path
sys.path.append(os.path.dirname(os.getcwd()))

from models.energy import EnergyUsage, SolarGeneration, init_db, get_session
print("✓ Imports successful")

## Step 1: Initialize Database Schema

Create the SQLite database and tables if they don't exist.

In [None]:
# Initialize database
db_path = "ecohome.db"
engine = init_db(db_path)
print(f"✓ Database initialized at: {os.path.abspath(db_path)}")
print(f"✓ Tables created: energy_usage, solar_generation")

## Step 2: Generate Sample Energy Usage Data

Generate realistic energy usage patterns for the past 90 days with:
- Daily variations (higher usage during day, lower at night)
- Seasonal patterns (higher HVAC in summer/winter)
- Weekly patterns (different weekday vs weekend usage)
- Realistic breakdown by category (HVAC, appliances, EV, other)

In [None]:
def generate_energy_usage_data(days: int = 90) -> list:
    """Generate realistic energy usage data."""
    usage_data = []
    end_date = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
    start_date = end_date - timedelta(days=days)
    
    current_date = start_date
    
    while current_date <= end_date:
        # Seasonal factor (higher in summer and winter)
        month = current_date.month
        if month in [6, 7, 8]:  # Summer
            seasonal_factor = 1.3
        elif month in [12, 1, 2]:  # Winter
            seasonal_factor = 1.2
        else:  # Spring/Fall
            seasonal_factor = 0.9
        
        # Weekend vs weekday
        is_weekend = current_date.weekday() >= 5
        weekend_factor = 1.1 if is_weekend else 1.0
        
        # Base daily usage
        base_hvac = 12.0 * seasonal_factor  # HVAC usage varies by season
        base_appliances = 8.0 * weekend_factor  # Slightly higher on weekends
        base_ev = 10.0  # EV charging relatively constant
        base_other = 5.0
        
        # Add random variation
        hvac = base_hvac * random.uniform(0.8, 1.2)
        appliances = base_appliances * random.uniform(0.7, 1.3)
        ev = base_ev * random.uniform(0.5, 1.5)  # More variation for EV
        other = base_other * random.uniform(0.8, 1.2)
        
        total = hvac + appliances + ev + other
        
        # Create record
        usage_data.append({
            "timestamp": current_date,
            "total_kwh": round(total, 2),
            "hvac_kwh": round(hvac, 2),
            "appliances_kwh": round(appliances, 2),
            "ev_charging_kwh": round(ev, 2),
            "other_kwh": round(other, 2)
        })
        
        current_date += timedelta(days=1)
    
    return usage_data

# Generate data
print("Generating energy usage data...")
usage_records = generate_energy_usage_data(90)
print(f"✓ Generated {len(usage_records)} days of energy usage data")
print(f"\nSample record:")
print(f"  Date: {usage_records[0]['timestamp'].strftime('%Y-%m-%d')}")
print(f"  Total: {usage_records[0]['total_kwh']} kWh")
print(f"  HVAC: {usage_records[0]['hvac_kwh']} kWh")
print(f"  Appliances: {usage_records[0]['appliances_kwh']} kWh")
print(f"  EV: {usage_records[0]['ev_charging_kwh']} kWh")
print(f"  Other: {usage_records[0]['other_kwh']} kWh")

## Step 3: Insert Energy Usage Data

Save the generated energy usage data to the database.

In [None]:
# Insert data into database
session = get_session()

try:
    print("Inserting energy usage records...")
    for record in usage_records:
        usage = EnergyUsage(**record)
        session.add(usage)
    
    session.commit()
    print(f"✓ Successfully inserted {len(usage_records)} energy usage records")
    
except Exception as e:
    session.rollback()
    print(f"✗ Error inserting data: {e}")
    raise
finally:
    session.close()

## Step 4: Generate Sample Solar Generation Data

Generate realistic solar generation patterns with:
- Daily sun curves (peak at noon, zero at night)
- Seasonal variations (more in summer, less in winter)
- Weather effects (cloudy days reduce generation)
- Realistic distribution between self-consumption, export, and battery storage

In [None]:
def generate_solar_generation_data(days: int = 90) -> list:
    """Generate realistic solar generation data."""
    solar_data = []
    end_date = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
    start_date = end_date - timedelta(days=days)
    
    current_date = start_date
    
    while current_date <= end_date:
        # Seasonal factor (more in summer, less in winter)
        month = current_date.month
        if month in [5, 6, 7]:  # Peak summer
            seasonal_factor = 1.4
        elif month in [8, 9]:  # Late summer/early fall
            seasonal_factor = 1.2
        elif month in [4, 10]:  # Spring/fall
            seasonal_factor = 1.0
        elif month in [3, 11]:  # Early spring/late fall
            seasonal_factor = 0.8
        else:  # Winter
            seasonal_factor = 0.6
        
        # Weather factor (random cloudy days)
        weather = random.choice(["sunny", "sunny", "sunny", "partly_cloudy", "cloudy"])
        if weather == "sunny":
            weather_factor = 1.0
        elif weather == "partly_cloudy":
            weather_factor = 0.7
        else:  # cloudy
            weather_factor = 0.3
        
        # Base generation (assuming 6kW system)
        base_generation = 25.0  # kWh per day at optimal conditions
        
        # Calculate actual generation
        generated = base_generation * seasonal_factor * weather_factor * random.uniform(0.9, 1.1)
        
        # Distribute generation
        # Self-consumption typically 40-60%
        self_consumed = generated * random.uniform(0.4, 0.6)
        
        # Battery storage typically 20-30% of generation
        battery_stored = generated * random.uniform(0.2, 0.3)
        
        # Rest is exported
        exported = generated - self_consumed - battery_stored
        
        # Ensure no negative values
        exported = max(0, exported)
        
        # Create record
        solar_data.append({
            "timestamp": current_date,
            "generated_kwh": round(generated, 2),
            "self_consumed_kwh": round(self_consumed, 2),
            "exported_kwh": round(exported, 2),
            "battery_stored_kwh": round(battery_stored, 2)
        })
        
        current_date += timedelta(days=1)
    
    return solar_data

# Generate data
print("Generating solar generation data...")
solar_records = generate_solar_generation_data(90)
print(f"✓ Generated {len(solar_records)} days of solar generation data")
print(f"\nSample record:")
print(f"  Date: {solar_records[0]['timestamp'].strftime('%Y-%m-%d')}")
print(f"  Generated: {solar_records[0]['generated_kwh']} kWh")
print(f"  Self-Consumed: {solar_records[0]['self_consumed_kwh']} kWh")
print(f"  Exported: {solar_records[0]['exported_kwh']} kWh")
print(f"  Stored: {solar_records[0]['battery_stored_kwh']} kWh")

## Step 5: Insert Solar Generation Data

Save the generated solar data to the database.

In [None]:
# Insert data into database
session = get_session()

try:
    print("Inserting solar generation records...")
    for record in solar_records:
        solar = SolarGeneration(**record)
        session.add(solar)
    
    session.commit()
    print(f"✓ Successfully inserted {len(solar_records)} solar generation records")
    
except Exception as e:
    session.rollback()
    print(f"✗ Error inserting data: {e}")
    raise
finally:
    session.close()

## Step 6: Verify Data

Query the database to verify data was inserted correctly and view summary statistics.

In [None]:
from sqlalchemy import func

session = get_session()

try:
    # Count records
    usage_count = session.query(EnergyUsage).count()
    solar_count = session.query(SolarGeneration).count()
    
    print("Database Summary:")
    print("=" * 50)
    print(f"Total energy usage records: {usage_count}")
    print(f"Total solar generation records: {solar_count}")
    print()
    
    # Energy usage statistics
    usage_stats = session.query(
        func.avg(EnergyUsage.total_kwh).label("avg_total"),
        func.avg(EnergyUsage.hvac_kwh).label("avg_hvac"),
        func.avg(EnergyUsage.appliances_kwh).label("avg_appliances"),
        func.avg(EnergyUsage.ev_charging_kwh).label("avg_ev"),
        func.avg(EnergyUsage.other_kwh).label("avg_other")
    ).first()
    
    print("Energy Usage Statistics (Daily Averages):")
    print("-" * 50)
    print(f"  Total: {usage_stats.avg_total:.2f} kWh/day")
    print(f"  HVAC: {usage_stats.avg_hvac:.2f} kWh/day ({usage_stats.avg_hvac/usage_stats.avg_total*100:.1f}%)")
    print(f"  Appliances: {usage_stats.avg_appliances:.2f} kWh/day ({usage_stats.avg_appliances/usage_stats.avg_total*100:.1f}%)")
    print(f"  EV Charging: {usage_stats.avg_ev:.2f} kWh/day ({usage_stats.avg_ev/usage_stats.avg_total*100:.1f}%)")
    print(f"  Other: {usage_stats.avg_other:.2f} kWh/day ({usage_stats.avg_other/usage_stats.avg_total*100:.1f}%)")
    print()
    
    # Solar generation statistics
    solar_stats = session.query(
        func.avg(SolarGeneration.generated_kwh).label("avg_generated"),
        func.avg(SolarGeneration.self_consumed_kwh).label("avg_consumed"),
        func.avg(SolarGeneration.exported_kwh).label("avg_exported"),
        func.avg(SolarGeneration.battery_stored_kwh).label("avg_stored")
    ).first()
    
    print("Solar Generation Statistics (Daily Averages):")
    print("-" * 50)
    print(f"  Generated: {solar_stats.avg_generated:.2f} kWh/day")
    print(f"  Self-Consumed: {solar_stats.avg_consumed:.2f} kWh/day ({solar_stats.avg_consumed/solar_stats.avg_generated*100:.1f}%)")
    print(f"  Exported: {solar_stats.avg_exported:.2f} kWh/day ({solar_stats.avg_exported/solar_stats.avg_generated*100:.1f}%)")
    print(f"  Stored: {solar_stats.avg_stored:.2f} kWh/day ({solar_stats.avg_stored/solar_stats.avg_generated*100:.1f}%)")
    print()
    
    # Calculate solar offset
    solar_offset = (solar_stats.avg_generated / usage_stats.avg_total) * 100
    print(f"Solar Offset: {solar_offset:.1f}% of daily usage")
    print()
    
    # Sample recent records
    print("Most Recent Records:")
    print("-" * 50)
    
    recent_usage = session.query(EnergyUsage).order_by(EnergyUsage.timestamp.desc()).limit(3).all()
    print("Energy Usage:")
    for usage in recent_usage:
        print(f"  {usage.timestamp.strftime('%Y-%m-%d')}: {usage.total_kwh} kWh")
    
    print()
    
    recent_solar = session.query(SolarGeneration).order_by(SolarGeneration.timestamp.desc()).limit(3).all()
    print("Solar Generation:")
    for solar in recent_solar:
        print(f"  {solar.timestamp.strftime('%Y-%m-%d')}: {solar.generated_kwh} kWh generated")
    
    print("\n✓ Database setup complete!")
    
finally:
    session.close()

## Summary

Database has been successfully initialized with:
- 90 days of energy usage data
- 90 days of solar generation data
- Realistic patterns including seasonal variations, weather effects, and usage patterns

The database is now ready for use by the EcoHome agent!

Next steps:
1. Run `02_rag_setup.ipynb` to set up the RAG knowledge base
2. Run `03_run_and_evaluate.ipynb` to test the agent