# Steel Industry Energy Consumption - Model Training

This notebook demonstrates the training process for our energy consumption prediction model.

In [None]:
import sys
import os
sys.path.append('../src')

import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from steel_energy_opt.data_processor import DataProcessor
from steel_energy_opt.model import EnergyPredictor, ModelTrainer

## 1. Load and Preprocess Data

In [None]:
# Initialize data processor
processor = DataProcessor()

# Load and preprocess data
data_path = '../data/raw/Steel_industry_data.csv'
df = processor.load_data(data_path)
df = processor.preprocess_data(df)

# Display basic information about the dataset
print("Dataset Info:")
print(df.info())
print("\nSample Data:")
print(df.head())

## 2. Prepare Features and Split Data

In [None]:
# Prepare features
X, y = processor.prepare_features(df)

# Split data
data_splits = processor.split_data(X, y)

# Convert to PyTorch tensors
X_train = torch.FloatTensor(data_splits['X_train'])
y_train = torch.FloatTensor(data_splits['y_train'])
X_val = torch.FloatTensor(data_splits['X_val'])
y_val = torch.FloatTensor(data_splits['y_val'])
X_test = torch.FloatTensor(data_splits['X_test'])
y_test = torch.FloatTensor(data_splits['y_test'])

print(f"Training set size: {X_train.shape}")
print(f"Validation set size: {X_val.shape}")
print(f"Test set size: {X_test.shape}")

## 3. Initialize and Train Model

In [None]:
# Initialize model
input_size = X_train.shape[1]
model = EnergyPredictor(input_size=input_size)
trainer = ModelTrainer(model)

# Train model
history = trainer.train(
    train_data=(X_train, y_train),
    val_data=(X_val, y_val),
    epochs=10000,
    early_stopping_patience=20
)

## 4. Visualize Training Results

In [None]:
# Plot training and validation loss
plt.figure(figsize=(10, 6))
plt.plot(history['train_loss'], label='Training Loss')
plt.plot(history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss Over Time')
plt.legend()
plt.grid(True)
plt.show()

## 5. Evaluate Model on Test Set

In [None]:
# Get predictions on test set
model.eval()
with torch.no_grad():
    test_predictions = model(X_test).squeeze()
    test_loss = trainer.validate(X_test, y_test)

print(f"Test Loss: {test_loss:.4f}")

# Plot actual vs predicted values
plt.figure(figsize=(10, 6))
plt.scatter(y_test.numpy(), test_predictions.numpy(), alpha=0.5)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
plt.xlabel('Actual Energy Consumption')
plt.ylabel('Predicted Energy Consumption')
plt.title('Actual vs Predicted Energy Consumption')
plt.grid(True)
plt.show()

## 6. Save Model

In [None]:
# Save the trained model
from datetime import datetime
import os

save_dir = '../models'
os.makedirs(save_dir, exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
save_path = os.path.join(save_dir, f"model_{timestamp}.pt")

torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': trainer.optimizer.state_dict(),
    'history': history,
    'test_loss': test_loss
}, save_path)

print(f"Model saved to {save_path}")