In [1]:
import sys
import os

# Get the current working directory of the notebook
notebook_dir = os.getcwd()

# Go up two levels and enter 'src'
src_path = os.path.abspath(os.path.join(notebook_dir, "..", "..", "src"))

# Add src to sys.path
sys.path.append(src_path)

In [2]:
import os
import pandas as pd
from datetime import datetime
import importlib
import prediction.estimator
importlib.reload(prediction.estimator)
from prediction.estimator import train_energy_model, predict_energy

# Step 1 — Train model
model_path = r"E:\SustainableAiProject\model\energy_predictor\energy_predictor.pkl"
train_energy_model(r"E:\SustainableAiProject\data\processed\processed_data.csv", save_model_path=model_path)

# Step 2 — Define test feature sets (you can add multiple rows here)
test_features_list = [
    {
        "num_layers": 12,
        "training_hours": 5,
        "flops_per_hour": 2.3e12,
        "token_count": 15000,
        "readability_score": 8.7
    },
    {
        "num_layers": 8,
        "training_hours": 3,
        "flops_per_hour": 1.5e12,
        "token_count": 12000,
        "readability_score": 7.2
    }
]

# Step 3 — Run predictions and store results
rows = []
for features in test_features_list:
    prediction = predict_energy(model_path, features)
    rows.append({
        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        **features,
        "predicted_energy": prediction
    })

# Step 4 — Save to reports/energy_estimates.csv
os.makedirs("reports", exist_ok=True)
report_file = r"E:\SustainableAiProject\reports\energy_estimates.csv"
df_report = pd.DataFrame(rows)
df_report.to_csv(report_file, index=False)

print(f"✅ Report created at {report_file}")

CSV Columns: ['num_layers', 'training_hours', 'flops_per_hour', 'token_count', 'readability_score', 'energy_consumption']
✅ Model trained and saved at: E:\SustainableAiProject\model\energy_predictor\energy_predictor.pkl
✅ Report created at E:\SustainableAiProject\reports\energy_estimates.csv




In [3]:
import matplotlib.pyplot as plt
import pandas as pd
import joblib
import os

# Load model and data
model = joblib.load(r"E:\SustainableAiProject\model\energy_predictor\energy_predictor.pkl")
df = pd.read_csv(r"E:\SustainableAiProject\data\processed\processed_data.csv")

features = ['num_layers', 'training_hours', 'flops_per_hour', 'token_count', 'readability_score']
X = df[features]
y = df['energy_consumption']

# Feature importance
importances = model.feature_importances_
plt.figure(figsize=(8,5))
plt.bar(features, importances)
plt.title("Feature Importance for Energy Prediction")
plt.ylabel("Importance")
plt.xticks(rotation=45)
# os.makedirs("reports/visualizations", exist_ok=True)
plt.tight_layout()
plt.savefig(r"E:\SustainableAiProject\reports\visualizations\feature_importance.png")
plt.close()