# **Energy Efficiency Prediction in Smart Buildings**
This notebook helps analyze energy consumption in smart buildings using AI.

### **Steps:**
1. Install required libraries
2. Generate synthetic dataset
3. Train a Random Forest model
4. Evaluate performance and visualize results
---

In [None]:
!pip install pandas numpy scikit-learn matplotlib seaborn

## **Step 1: Generate Dataset**

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
import random

start_date = datetime(2022, 1, 1, 0)
end_date = datetime(2023, 12, 31, 23)
date_range = pd.date_range(start=start_date, end=end_date, freq='H')

data = {
    'timestamp': date_range,
    'temperature': np.random.normal(25, 5, len(date_range)),
    'humidity': np.random.normal(60, 10, len(date_range)),
    'wind_speed': np.random.normal(10, 2, len(date_range)),
    'occupancy': [random.randint(0, 100) if 8 <= d.hour <= 18 else 0 for d in date_range],
}

df = pd.DataFrame(data)
df['energy_consumption'] = (
    df['temperature'] * 0.5 +
    df['humidity'] * 0.2 +
    df['occupancy'] * 1.5 +
    df['wind_speed'] * 0.3 +
    np.random.normal(0, 5, len(df))
)

df.to_csv('building_data.csv', index=False)
print('Dataset created!')

## **Step 2: Train Machine Learning Model**

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

df = pd.read_csv('building_data.csv')
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Feature Engineering
df['hour'] = df['timestamp'].dt.hour
df['day'] = df['timestamp'].dt.dayofweek
df['is_weekend'] = df['day'].apply(lambda x: 1 if x >= 5 else 0)

features = ['temperature', 'humidity', 'wind_speed', 'occupancy', 'hour', 'day', 'is_weekend']
target = 'energy_consumption'

X = df[features]
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)

print(f'MAE: {mae:.2f}')
print(f'RMSE: {rmse:.2f}')
print(f'R2 Score: {r2:.2f}')

## **Step 3: Visualizing Results**

In [None]:
importances = model.feature_importances_
feature_importance_df = pd.DataFrame({'Feature': features, 'Importance': importances}).sort_values(by='Importance', ascending=False)

plt.figure(figsize=(8, 6))
sns.barplot(x='Importance', y='Feature', data=feature_importance_df)
plt.title('Feature Importance')
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(y_test.values[:100], label='Actual')
plt.plot(y_pred[:100], label='Predicted', linestyle='--')
plt.title('Actual vs Predicted Energy Consumption')
plt.xlabel('Samples')
plt.ylabel('Energy Consumption')
plt.legend()
plt.show()