In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import random

In [6]:
# Simulated dataset: energy consumption based on time, temperature, and occupancy
data = {
    'hour': np.tile(np.arange(24), 30),  # 30 days of hourly data
    'temperature': np.random.uniform(15, 35, 24 * 30),  # Temperature variation
    'occupancy': np.random.randint(0, 100, 24 * 30),  # Number of people in the building
    'energy_consumption': np.random.uniform(50, 500, 24 * 30)  # Energy consumption (kWh)
}

In [7]:
df = pd.DataFrame(data)

In [10]:
# Splitting data into train and test sets
X = df[['hour', 'temperature', 'occupancy']]
y = df['energy_consumption']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_test.head())

# 

     hour  temperature  occupancy
340     4    33.871093         20
290     2    25.510116          2
54      6    33.999701         39
198     6    29.503034         97
453    21    26.640147         12


In [11]:
# Train a machine learning model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [15]:
# Q-learning for energy optimization
states = X_test.values
actions = [0.8, 1.0]  # 0.8: Reduce consumption, 1.0: Keep normal
q_table = np.zeros((len(states), len(actions)))
alpha = 0.1  # Learning rate
gamma = 0.9  # Discount factor
epsilon = 0.1  # Exploration rate

for episode in range(1000):  # Training iterations
    state_index = random.randint(0, len(states) - 1)
    
    if random.uniform(0, 1) < epsilon:
        action_index = random.randint(0, len(actions) - 1)  # Explore
    else:
        action_index = np.argmax(q_table[state_index])  # Exploit
    
    action = actions[action_index]
    
    # Convert state to DataFrame with feature names
    state_df = pd.DataFrame([states[state_index]], columns=X_test.columns)
    
    # Predict energy consumption
    reward = -abs(model.predict(state_df)[0] * action - y_test.iloc[state_index])
    
    next_state_index = (state_index + 1) % len(states)
    q_table[state_index, action_index] += alpha * (
        reward + gamma * np.max(q_table[next_state_index]) - q_table[state_index, action_index]
    )

# Apply optimized actions
optimized_energy = [
    model.predict(pd.DataFrame([state], columns=X_test.columns))[0] * actions[np.argmax(q_table[i])]
    for i, state in enumerate(states)
]

In [16]:
df_results = pd.DataFrame({'Actual': y_test, 'Predicted': model.predict(X_test), 'Optimized': optimized_energy})
print(df_results.head())


         Actual   Predicted   Optimized
340  322.824894  320.710360  320.710360
290  417.619649  256.269963  205.015970
54   403.680218  354.888425  354.888425
198  222.536113  354.838049  354.838049
453  267.065569  347.174470  277.739576
