# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Title & Introduction
## Smart Energy Advisor: Reducing Building Energy Costs with Generative AI
This notebook is part of Google x Kaggle GenAI Intensive Capstone Project. It demonstrates how Generative AI can help building owners reduce energy costs through data analysis and practical recommendations.

# Project Objectives
- Analyze building energy usage using input data (location, size, age, etc.)
- Use Generative Ai to suggest cost-saving strategies
- Visualize the potential impact of the suggested changes
- Use Gemini (google-genai) to simulate expert advice


# Tools & Technology
- python
- Google Gemini generative AI (latest version via 'google-genai' package)
- Pandas, Matplotlib, Seaborn (for data processing & visualization)
- Synthetic data generation

# How the AI Works in This Project
## Generative AI Capabilities Used
This notebook uses at least three Generative AI capabilities to provide solutions:
1. Text generation: Generate energy-saving recommendations and tips based on user input.
2. Code generation: Automatically generate code snippets for energy analysis and visualization.
3. Summarization: Summarizze building energy reports and optimization suggestions using AI.
These capabilities are powered by Google's Gemini Generative AI via the google-genai package.

# Data Overview
The input data for this project includes the following features:
- Building location (e.g. city, region)
- Type of building (e.g. residential, commecial, recreational)
- Approximate floor area (in squere meters)
- Year of construction
- Annual utility bills (e.g. electricity, gas, etc.)
- Additiional features (e.g. number of floors, insulation level, etc.)
This data will be used to generate energy usage patterns and propose cost-effective energy-saving strategies using Generative AI.

# Conclusion
By utilizing Google Gemini Generative AI, this project demonstrates how AI can help optimize energy consumption based on various building features and provides actionable recommendations for cost-saving strategies.
The use of Generatiove AI capabilities, such as text generation, code generation, and summerization, provides building owners with practical insights to reduce energy costs effectively.

# Setup
Before running the code, make sure you have the required dependencies installed. This includes Python libraries for data manipulation, visualization, and working with Google Generative AI.

we will install the following libraries:
- Google Gemini Generative AI ('google-genai') for generating recommendations and insights.
- Pandas for data manipulation.
- Matplotlib and Seaborn for data visualization.
- NumPy for numerical operations.


In [None]:
!pip install -q google-genai pandas matplotlib seaborn numpy
!pip install -q google-generativeai

In [None]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("GOOGLE_API_KEY")

import google.generativeai as genai

genai.configure(api_key=secret_value_0)

In [None]:
models = genai.list_models()
for model in models:
    print(f"Name: {model.name}, Supports generateContent: {'generateContent' in model.supported_generation_methods}")

In [None]:
model = genai.GenerativeModel('models/gemini-1.5-pro-latest')

response = model.generate_content("List five ways to reduce energy consumption in buildings.")

print(response.text)

# TODO
Create or generate a sample dataset

In [None]:
import warnings
import pandas as pd
import numpy as np

# Create a mock dataset
np.random.seed(42)

n_samples = 100

data = pd.DataFrame({
    'Building_Type': np.random.choice(['Residential', 'Commercial', 'Industrial'], size=n_samples),
    'Region': np.random.choice(['Cold', 'Hot', 'Moderate'], size=n_samples),
    'Size_m2': np.random.randint(50, 5000, size=n_samples),
    'Insulation_Level': np.random.choice(['Poor', 'Average', 'Good'], size=n_samples),
    'HVAC_Efficiency': np.random.uniform(0.5, 1.0, size=n_samples).round(2),
    'Lighting_Type': np.random.choice(['Incandescent', 'CFL', 'LED'], size=n_samples),
    'Appliance_Efficiency': np.random.uniform(0.5, 1.0, size=n_samples).round(2),
    'Water_Heater_Type': np.random.choice(['Tank', 'Tankless'], size=n_samples),
    
})

# Supporess FutureWarning
warnings.filterwarnings("ignore", category=FutureWarning)

data.replace([np.inf, -np.inf], np.nan, inplace=True)

# Simulate energy cost based on some logic

def calculate_cost(row):
    base = 200 if row['Building_Type'] == 'Residential' else 500
    insulation_factor = {'Poor': 1.3, 'Average': 1.0, 'Good': 0.7}[row['Insulation_Level']]
    lighting_factor = {'Incandescent': 1.2, 'CFL': 1.0, 'LED': 0.7}[row['Lighting_Type']]
    hvac_eff = row['HVAC_Efficiency']
    appliance_eff = row['Appliance_Efficiency']
    cost = base * insulation_factor * lighting_factor / (hvac_eff * appliance_eff)
    return round(cost + np.random.normal(0, 20), 2)

data['Monthly_Energy_Cost']= data.apply(calculate_cost, axis=1)

# Show the dataset
data.head()

In [None]:
# Basic structure and types
print(data.info())

# Summary statistics
print(data.describe())

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Distribution of Monthly Energy Cost
plt.figure(figsize=(8, 4))
sns.histplot(data['Monthly_Energy_Cost'], bins=30, kde=True, color='skyblue')
plt.title('Distribution of Monthly Energy Cost')
plt.show()

# Energy cost by Insulation Level
plt.figure(figsize=(8, 4))
sns.boxplot(data=data, x='Insulation_Level', y='Monthly_Energy_Cost', palette='coolwarm')
plt.title('HVAC Efficiency vs Energy Cost')
plt.show()

# Energy cost by HVAC efficiency
plt.figure(figsize=(8, 4))
sns.scatterplot(data=data, x='HVAC_Efficiency', y='Monthly_Energy_Cost', hue='Region')
plt.title('HVAC Efficiency vs Energy Cost')
plt.show()

In [None]:
# Correlation matrix
corr = data.select_dtypes(include=['float64', 'int64']).corr()

plt.figure(figsize=(6, 5))
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Feature Correlation Heatmap')
plt.show()

In [None]:
# Handle missing data
data.dropna(inplace=True)

# Encoding categorical variables
data_encoded = pd.get_dummies(data, drop_first=True)

# Normalizeing or scaling numerical features
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaled_columns = ['Monthly_Energy_Cost', 'HVAC_Efficiency']
data_encoded[scaled_columns]= scaler.fit_transform(data_encoded[scaled_columns])

print(data_encoded.head)

# Text Generation

In [None]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("GOOGLE_API_KEY")

import google.generativeai as genai

genai.configure(api_key=secret_value_0)

def generate_energy_recommendation(building_type, hvac_efficiency, insulation_level):
    prompt = f"""Generate energy-saving recommendations for a {building_type}building.
    HVAC Efficiency: {hvac_efficiency}%Insulation Level: {insulation_level}.
    Provide strategies for HVAC, lighting, and temperature control.
    """
    # Generate recommendations using the Generative AI model
    model = genai.GenerativeModel("gemini-1.5-pro-latest")
    response = model.generate_content(prompt)
    return response.text.strip()

recommendation = generate_energy_recommendation('commercial', 75, 'Good')
print("Generated Recommendation:\n", recommendation)

# Synthetic Data Generationfor Energy Scenarios

In [None]:
def generate_synthetic_energy_data(season, occupancy,building_size, weather):
    prompt = f"""Generate a synthetic table of energy consumption data for a {building_size} building.
    Conditions:
    - Season: {season}
    - Occupancy Level: {occupancy}
    - Weather: {weather}

    Generate at least 10 rows with the following columns:
    - Date
    - HVAC usage (kWh)
    - Lighthing usage (kWh)
    - Equipment usage (kWh)
    - Total energy consuption (kWh)
    - Estimated cost ($)

    Format the output as a table in CSV format (with headers).
    """
    response = genai.GenerativeModel("gemini-1.5-pro-latest").generate_content(prompt)
    return response.text.strip()


# Convert Synthetic CSV to DataFrame

In [None]:
from io import StringIO
import pandas as pd
import warnings

warnings.filterwarnings("ignore", category=RuntimeWarning)

csv_text = response.text

try:
    synthetic_data = pd.read_csv(StringIO(csv_text))
except pd.errors.ParserError:
    synthetic_data = pd.read_csv(StringIO(csv_text), sep='\t')

synthetic_data.head()

In [None]:
from datetime import datetime, timedelta

start_date = datetime(2024, 1, 1)
synthetic_data['Date'] = [start_date + timedelta(days=i) for i in range(len(synthetic_data))]

synthetic_data['Date'] = pd.to_datetime(synthetic_data['Date'])
#Extract features
synthetic_data['Month'] = synthetic_data['Date'].dt.month
synthetic_data['DayOfWeek'] = synthetic_data['Date'].dt.dayofweek

display(synthetic_data.head())

In [None]:
lines = csv_text.strip().split('\n')

cleaned_lines = [line.strip() for line in lines if line.strip() != ""]

synthetic_data = pd.DataFrame(cleaned_lines, columns=["Energy_Saving_Tip"])

display(synthetic_data.head())

In [None]:
print("Columns:", synthetic_data.columns)

print("\nMissing values:\n", synthetic_data.isnull().sum)

In [None]:
csv_text = generate_synthetic_energy_data("Winter", "High", "Large", "snowy" )

# Convert CSV text to DataFrame
from io import StringIO
synthetic_data = pd.read_csv(StringIO(csv_text))

# Clean the CSV text (before reading0 by removing any unwanted characters or headers
cleaned_csv_text = csv_text.replace('```csv','').strip()

# Convert the cleaned CSV text to a DataFrame
synthetic_data = pd.read_csv(StringIO(cleaned_csv_text))

# Always clean column headers immediately
#synthetic_data.columns = synthetic_data.columns.str.strip()

# Debugging aid to confirm actual column names
#print("Cleaned Columns:", synthetic_data.columns.tolist())

# Only proceed if 'Date' is available
#if 'Date' in synthetic_data.columns:
    #synthetic_data['Date'] = pd.to_datetime(synthetic_data['Date'], errors='coerce')
    #synthetic_data['Month'] = synthetic_data['Date'].dt.month
    #synthetic_data['DayOfWeek'] = synthetic_data['Date'].dt.dayofweek
    #display(synthetic_data.head()) 
#else:
    #print("No 'Date' column found in the data. please check Gemini's CSV output.")

print("Data Info:")
print(synthetic_data.info())

print("\nSample Data:")
display(synthetic_data.head())

print("\nStatistical Summary:")
print(synthetic_data.describe())


# Machine Learning Model for Training a Model to Predict Energy Consumption

## 1. Preprocessing the Data

In [None]:
# check for missing values
print(synthetic_data.isnull().sum())

synthetic_data = synthetic_data.dropna()

synthetic_data.columns = synthetic_data.columns.str.strip()

print("Columns in DataFrame:", synthetic_data.columns.tolist())

display(synthetic_data.head())

In [None]:
print(synthetic_data.info())
print(synthetic_data.describe())
synthetic_data.head()

In [None]:
# Check if 'Date' column exist in the DataFarme'
if 'Date' in synthetic_data.columns:
    # Try to convert the 'Date' column to datetime format
    synthetic_data['Date'] = pd.to_datetime(synthetic_data['Date'], errors='coerce')

    # Check if all values in the 'Date' column are invalid (NaT)
    if synthetic_data['Date'].isnull().all():
        print("All date values were invalid or missing.")
    else:
        # Creatnew features from the date
        synthetic_data['Month'] = synthetic_data['Date'].dt.month
        synthetic_data['DayOfWeek'] = synthetic_data['Date'].dt.dayofweek
        display(synthetic_data.head())

else:
    #if the 'Date' column is missing, print a warning
    print("The 'Date' column does not exist in the DataFrame.")

## 2. Define Features and Target Variable

In [None]:
# Features (X), Target (y)
X = synthetic_data[['HVAC usage (kWh)', 'Lighting usage (kWh)', 'Equipment usage (kWh)', 'Month', 'DayOfWeek']]
y = synthetic_data['Total energy consumption (kWh)']

## 3. Train-Test Split

In [None]:
from sklearn.model_selection import train_test_split

# Split(80% for training, 20% for testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 4. Train the Model

In [None]:
from sklearn.ensemble import RandomForestRegressor

# initialize
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

## 5. Evaluate the Model

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Make predictions
y_pred = model.predict(X_test)

# Evaluate
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Absolute Error:{mae}')
print(f'Mean Squared Error:{mse}')
print(f'R-Squared:{r2}')

## 6. Visualize Predictions

In [None]:
import matplotlib.pyplot as plt

# Scatter plot of Actual vs Predicted values
plt.figure(figsize=(8, 4))
plt.scatter(y_test, y_pred, color= 'skyblue')
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], color='red', linestyle='--')
plt.title('Predicted vs Actual Energy Consumption')
plt.xlabel('Actual Energy Consumption')
plt.ylabel('Predicted Energy Consumption')
plt.show()

## 7. Fine_tuning the Model

In [None]:
from sklearn.model_selection import RandomizedSearchCV

# Hyperparametre grid for randomizeSearchCV
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Initialize RF
rf = RandomForestRegressor(random_state=42)

# Initialize RandomizedSearchCV
random_search = RandomizedSearchCV(rf, param_grid, n_iter=10, cv=3, random_state=42, n_jobs=-1)

# Fit the model
random_search.fit(X_train, y_train)

# Get the best parameters
print("Best Parameters:", random_search.best_params_)

# Use the best model
best_model = random_search.best_estimator_

# Make predictions and evaluate
y_pred_best = best_model.predict(X_test)

# Evaluate the best model
mae_best = mean_absolute_error(y_test, y_pred_best)
mse_best = mean_squared_error(y_test, y_pred_best)
r2_best = r2_score(y_test, y_pred_best)

print(f'Mean Absolute Error (Best Model):{mae_best}')
print(f'Mean Squared Error (Best Model):{mse_best}')
print(f'R-squared (Best Model):{r2_best}')





# Image Generation

In [None]:
# Use a different name so it doesn't conflict with ML model
genai_model = genai.GenerativeModel("gemini-1.5-pro-latest")

# Define image prompt
def generate_energy_system_visual(building_type, season, scenario):
    prompt = f""" You are an expert energy consultant. Create a detailed visual description of building's energy system layout.

    Building Type: {building_type}
    Season: {season}
    Scenario: {scenario}

    Include:
    - HVAC zones
    _ Lighting layout
    _ Equipment location
    _ Areas of high and low energy use
    - Suggest ehere improvements could be made

    Respond in a detailed markdown-style description that could be visualized or drawn.
    """
    # Note: This part has been commented out due to quota limits. It worked previously and can be re-run when quota is avaialbe   
    # response = genai_model.generate_content(prompt)
    return response.text.strip()

visual_description = generate_energy_system_visual("Commercial", "Winter", "High Occupancy, Low Insulation")
print(visual_description)

# Convert Description to Image

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches


# Set up the figure
fig, ax = plt.subplots(figsize=(12, 8))

# Create HVAC zones with color coding
# Zone 1: Perimeter Offices (high energy use - light red)
zone1 = patches.Rectangle((0,0),2,8, linewidth=1, edgecolor='black', facecolor='lightcoral', label='Zone 1 - perimeter Office-High Use')
zone1b = patches.Rectangle((8,0), 2, 8, linewidth=1, edgecolor='black', facecolor='lightcoral')
ax.add_patch(zone1)
ax.add_patch(zone1b)

# Zone 2: Interior Offices (moderate energy use - orange)
zone2 = patches.Rectangle((2, 0), 6, 8, linewidth=1, edgecolor='black', facecolor='orange', label='Zone 2 - Interior Offices-Moderate Use')
ax.add_patch(zone2)

# Zone 3: Common Areas (moderate use - yellow)
zone3 = patches.Rectangle((2,3), 6, 2, linewidth=1, edgecolor='black', facecolor='khaki', label='Zone 3 - Common Areas-Moderate Use')
ax.add_patch(zone3)

# Server room(high energy use - red)
server_room = patches.Rectangle((6.5, 6.5), 1, 1, linewidth=1, edgecolor='black', facecolor='red', label='Server Room-High Use')
ax.add_patch(server_room)
ax.text(6.6, 6.8, 'Server\nRoom', fontsize=8)

# Rooftop Unit (symbolic)
ax.add_patch(patches.Rectangle((4.5, 8.2), 1, 0.4, linewidth=1, edgecolor='gray', facecolor='lightgray'))
ax.text(4.6, 8.3, 'RTU', fontsize=8)

# Boiler in basement (symbolic)
ax.add_patch(patches.Rectangle((0.5, -1), 1, 0.4, linewidth=1, edgecolor='gray', facecolor='lightgray'))
ax.text(0.6, -0.9, 'Boiler', fontsize=8)

# Layout setting
ax.set_xlim(0, 10)
ax.set_ylim(-1.5, 9)
ax.set_title("Commercial Building Floor - Energy Zones (Winter)", fontsize=14)
ax.set_aspect('equal')
ax.axis('off')

# Legend
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles, labels, loc='upper left')

plt.show()

# Capstone Summary 
## Project Title:
GenAI-Powered Energy Optimization for Buildings

## Objective:
Help building managers optimize energy consumption using real-time AI-generated recommendations, simulated data, and visual system analysis.

## AI Capabilities Used:
- Text Generation: Suggest actionable strategies based on HVAC efficiency, insulation, and building type.
- Synthetic Data Generation: Simulate energy usage under different conditions (season, occupancy, weather).
- Image Generation: Describe visual layouts of energy systems with potential improvement areas.


# Real_World Use Case Simulation

In [None]:
def generate_energy_recommendation(building_type, hvac_efficiency, insulation_level):
    efficiency_msg = "high-efficiency" if hvac_efficiency > 85 else "low-efficiency"
    insulation_msg = "well-insulated" if insulation_level == "Good" else "poorly insulated"
    return (f"Your{building_type} building has a {efficiency_msg} HVAC system and is {insulation_msg}."
           f"Consider upgrading insulation or HVAC components to improve energy performance.")

def generate_energy_system_visual(building_type, season, conditions):
    return f"A simplified diagram showing {building_type} building energy flow in {season} with conditions: {conditions}."

# Define a full assistant simulation function
def energy_assistant(building_type, hvac_efficiency, insulation_level, season, occupancy, weather):
    print("----- Personalized Energy-Saving Recommendations -----") 
    print(generate_energy_recommendation(building_type, hvac_efficiency, insulation_level))

    print("\n----- Synthetic Energy Usage Data -----")

    # Ask Gemini to generate real CSV-formatted data
    prompt_data = (
        f"Generate a synthetic table of energy consumption data for a {building_type} building.\n"
        f"Season: {season}, Occupancy: {occupancy}, Weather: {weather}.\n"
        f"Include columns: Date, HVAC usage (kWh), Lighting usage (kWh), Equipment usage (kWh),"
        f"Total energy (kWh), Estimated cost ($).\n"
        f"Format the output strictly as CSV without explanations or notes."
                  )

    # Note: This part has been commented out due to quota limits. It worked previously and can be re-run when quota is avaialbe   
    #response = genai.GenerativeModel('gemini-1.5-pro').generate_content(prompt_data)
    csv_text = response.text

    # Print the raw CSV content
    print("----- RAW CSV TEXT -----")
    print(csv_text)

    try:
        synthetic_data = pd.read_csv(StringIO(csv_text))
        display(synthetic_data.head())
    except Exception as e:
        print("Error parsing CSV:")
        print(e)

    print("\n----- Visual System Description -----")

    print(generate_energy_system_visual(building_type, season, f"{occupancy}, {insulation_level}"))

# Run the full assistant
energy_assistant(
    building_type="Commercial",
    hvac_efficiency=72,
    insulation_level="Poor",
    season="Winter",
    occupancy="High",
    weather="Snowy"
)


# FINAL CELL: WRAP_UP

In [None]:
print("Thank you for exploring my Generative AI Capstone project!")
print("This assistant demonstrates how AI can simulate and visualize energy-saving strategies for buildings.")