## 1. Import Required Libraries

In [None]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib
import matplotlib.pyplot as plt
from datetime import datetime

print("Libraries imported successfully!")

## 2. Connect to Database and Extract Data

In [None]:
# Create database connection
engine = create_engine('sqlite:///refit_energy_data.db')

# Query data for Aggregate meter
query = """
SELECT timestamp, power_w 
FROM energy_readings 
WHERE meter_id = 'Aggregate'
ORDER BY timestamp
"""

df = pd.read_sql_query(query, engine)
df['timestamp'] = pd.to_datetime(df['timestamp'])

print(f"Loaded {len(df)} records")
print(f"Date range: {df['timestamp'].min()} to {df['timestamp'].max()}")
df.head()

## 3. Feature Engineering

In [None]:
# Extract time-based features
df['hour_of_day'] = df['timestamp'].dt.hour
df['day_of_week'] = df['timestamp'].dt.dayofweek  # Monday=0, Sunday=6
df['day_of_year'] = df['timestamp'].dt.dayofyear
df['month'] = df['timestamp'].dt.month
df['is_weekend'] = (df['day_of_week'] >= 5).astype(int)  # Saturday/Sunday

# Display feature statistics
print("Feature Engineering Complete!")
print("\nFeature Statistics:")
print(df[['hour_of_day', 'day_of_week', 'day_of_year', 'month', 'is_weekend', 'power_w']].describe())

df.head(10)

## 4. Prepare Training and Testing Data

In [None]:
# Define features and target
feature_columns = ['hour_of_day', 'day_of_week', 'day_of_year', 'month', 'is_weekend']
X = df[feature_columns]
y = df['power_w']

# Split data: 90% training, 10% testing
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.1, shuffle=False  # Don't shuffle to maintain time order
)

print(f"Training samples: {len(X_train)}")
print(f"Testing samples: {len(X_test)}")
print(f"\nFeatures used: {feature_columns}")

## 5. Train the Prediction Model

In [None]:
# Initialize and train Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

print("Model trained successfully!")
print(f"\nModel Coefficients:")
for feature, coef in zip(feature_columns, model.coef_):
    print(f"  {feature}: {coef:.4f}")
print(f"  Intercept: {model.intercept_:.4f}")

## 6. Evaluate Model Performance

In [None]:
# Make predictions on test set
y_pred = model.predict(X_test)

# Calculate metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("Model Performance Metrics:")
print(f"  Mean Absolute Error (MAE): {mae:.2f} W")
print(f"  Root Mean Squared Error (RMSE): {rmse:.2f} W")
print(f"  R² Score: {r2:.4f}")
print(f"\nAverage actual power: {y_test.mean():.2f} W")
print(f"Average predicted power: {y_pred.mean():.2f} W")

## 7. Visualize Predictions

In [None]:
# Plot actual vs predicted values
plt.figure(figsize=(12, 6))
plt.plot(y_test.values[:100], label='Actual', marker='o', markersize=4)
plt.plot(y_pred[:100], label='Predicted', marker='x', markersize=4)
plt.xlabel('Sample Index')
plt.ylabel('Power (W)')
plt.title('Actual vs Predicted Energy Usage (First 100 Test Samples)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Scatter plot
plt.figure(figsize=(8, 8))
plt.scatter(y_test, y_pred, alpha=0.5)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
plt.xlabel('Actual Power (W)')
plt.ylabel('Predicted Power (W)')
plt.title('Actual vs Predicted Power Consumption')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 8. Save the Trained Model

In [None]:
# Serialize the model using joblib
model_filename = 'energy_predictor_model.joblib'
joblib.dump(model, model_filename)

print(f"Model saved successfully to '{model_filename}'!")
print(f"\nModel details:")
print(f"  Type: {type(model).__name__}")
print(f"  Features: {feature_columns}")
print(f"  Training samples: {len(X_train)}")
print(f"  MAE: {mae:.2f} W")
print(f"  R² Score: {r2:.4f}")

## 9. Test Model Loading (Verification)

In [None]:
# Load the saved model and verify it works
loaded_model = joblib.load(model_filename)

# Make a test prediction
test_sample = X_test.iloc[:1]
prediction = loaded_model.predict(test_sample)

print("Model loading test successful!")
print(f"\nTest input features:")
print(test_sample)
print(f"\nPredicted power: {prediction[0]:.2f} W")
print(f"Actual power: {y_test.iloc[0]:.2f} W")
print(f"\nThe model is ready for deployment in Flask API!")