In [1]:
import pandas as pd

# Load the dataset
file_path = 'resource/dataset.csv'  # Update with the actual path
data = pd.read_csv(file_path)

# Display the first few rows to understand the structure
data.head()

Unnamed: 0,Year,Month,Sector,Hydroelectric Power,Geothermal Energy,Solar Energy,Wind Energy,Wood Energy,Waste Energy,"Fuel Ethanol, Excluding Denaturant",Biomass Losses and Co-products,Biomass Energy,Total Renewable Energy,Renewable Diesel Fuel,Other Biofuels,Conventional Hydroelectric Power,Biodiesel
0,1973,1,Commerical,0.0,0.0,0.0,0.0,0.57,0.0,0.0,0.0,0.57,0.57,0.0,0.0,0.0,0.0
1,1973,1,Electric Power,0.0,0.49,0.0,0.0,0.054,0.157,0.0,0.0,0.211,89.223,0.0,0.0,88.522,0.0
2,1973,1,Industrial,1.04,0.0,0.0,0.0,98.933,0.0,0.0,0.0,98.933,99.973,0.0,0.0,0.0,0.0
3,1973,1,Residential,0.0,0.0,0.0,0.0,30.074,0.0,0.0,0.0,0.0,30.074,0.0,0.0,0.0,0.0
4,1973,1,Transportation,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [2]:
# Display column names and data types
print(data.info())

# Show basic statistics
print(data.describe())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3065 entries, 0 to 3064
Data columns (total 17 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   Year                                3065 non-null   int64  
 1   Month                               3065 non-null   int64  
 2   Sector                              3065 non-null   object 
 3   Hydroelectric Power                 3065 non-null   float64
 4   Geothermal Energy                   3065 non-null   float64
 5   Solar Energy                        3065 non-null   float64
 6   Wind Energy                         3065 non-null   float64
 7   Wood Energy                         3065 non-null   float64
 8   Waste Energy                        3065 non-null   float64
 9   Fuel Ethanol, Excluding Denaturant  3065 non-null   float64
 10  Biomass Losses and Co-products      3065 non-null   float64
 11  Biomass Energy                      3065 no

In [3]:
numeric_cols = data.select_dtypes(include=['number']).columns
data[numeric_cols] = data[numeric_cols].fillna(data[numeric_cols].mean())

In [4]:
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder

# Drop irrelevant columns (keeping only needed features)
data_cleaned = data[['Year', 'Month', 'Sector', 'Solar Energy', 'Wind Energy', 'Biomass Energy', 'Total Renewable Energy']]

# One-hot encode the 'Sector' column
# The 'sparse' argument has been replaced with 'sparse_output' in newer versions of scikit-learn
encoder = OneHotEncoder(sparse_output=False, drop='first')  
sector_encoded = encoder.fit_transform(data_cleaned[['Sector']])
sector_encoded_df = pd.DataFrame(sector_encoded, columns=encoder.get_feature_names_out(['Sector']))

# Combine encoded data with the original data
data_cleaned = pd.concat([data_cleaned.drop('Sector', axis=1), sector_encoded_df], axis=1)

# Normalize the data using MinMaxScaler
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data_cleaned)

# Convert back to DataFrame
data_scaled = pd.DataFrame(data_scaled, columns=data_cleaned.columns)

# Display the cleaned and scaled data
data_scaled.head()

Unnamed: 0,Year,Month,Solar Energy,Wind Energy,Biomass Energy,Total Renewable Energy,Sector_Electric Power,Sector_Industrial,Sector_Residential,Sector_Transportation
0,0.0,0.0,0.0,0.0,0.002444,0.00185,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.000905,0.289521,1.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.424241,0.324403,0.0,1.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.097587,0.0,0.0,1.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [5]:
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.optim as optim

# Define input and target variables
X = data_scaled[['Year', 'Month', 'Solar Energy', 'Wind Energy', 'Biomass Energy', 
                 'Sector_Electric Power', 'Sector_Industrial', 'Sector_Residential', 'Sector_Transportation']]
y = data_scaled['Total Renewable Energy']

# Split into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to tensors
X_train_tensor = torch.FloatTensor(X_train.values)
y_train_tensor = torch.FloatTensor(y_train.values).reshape(-1, 1)
X_test_tensor = torch.FloatTensor(X_test.values)
y_test_tensor = torch.FloatTensor(y_test.values).reshape(-1, 1)

# Create DataLoader for batch processing
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Define the neural network model
class EnergyModel(nn.Module):
    def __init__(self):
        super(EnergyModel, self).__init__()
        self.layer1 = nn.Linear(X_train_tensor.shape[1], 64)
        self.layer2 = nn.Linear(64, 32)
        self.layer3 = nn.Linear(32, 1)
        
    def forward(self, x):
        x = torch.relu(self.layer1(x))
        x = torch.relu(self.layer2(x))
        x = self.layer3(x)
        return x

# Initialize model, loss, and optimizer
model = EnergyModel()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 100
for epoch in range(epochs):
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
    
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluate the model
with torch.no_grad():
    y_pred = model(X_test_tensor)
    mse = nn.functional.mse_loss(y_pred, y_test_tensor).item()

mse

Epoch [10/100], Loss: 0.0005
Epoch [20/100], Loss: 0.0007
Epoch [30/100], Loss: 0.0003
Epoch [40/100], Loss: 0.0001
Epoch [50/100], Loss: 0.0003
Epoch [60/100], Loss: 0.0003
Epoch [70/100], Loss: 0.0004
Epoch [80/100], Loss: 0.0002
Epoch [90/100], Loss: 0.0002
Epoch [100/100], Loss: 0.0001


0.00027989791124127805

In [6]:
with torch.no_grad():
    y_pred = model(X_test_tensor)
    mse = nn.functional.mse_loss(y_pred, y_test_tensor).item()
print(f'Test MSE: {mse:.4f}')

Test MSE: 0.0003


In [7]:
new_data = torch.FloatTensor([[2025, 5, 200, 150, 100, 300, 400, 250, 150]])
predicted_energy = model(new_data).item()
print(f'Predicted Total Renewable Energy: {predicted_energy:.2f}')

Predicted Total Renewable Energy: 214.96


In [8]:
torch.save(model.state_dict(), 'energy_model.pth')

In [9]:
model = EnergyModel()
model.load_state_dict(torch.load('energy_model.pth'))
model.eval()

  model.load_state_dict(torch.load('energy_model.pth'))


EnergyModel(
  (layer1): Linear(in_features=9, out_features=64, bias=True)
  (layer2): Linear(in_features=64, out_features=32, bias=True)
  (layer3): Linear(in_features=32, out_features=1, bias=True)
)

In [10]:
from sklearn.metrics import r2_score, mean_absolute_error

# Convert tensors to numpy arrays for scikit-learn compatibility
y_test_np = y_test_tensor.numpy()
y_pred_np = y_pred.numpy()

r2 = r2_score(y_test_np, y_pred_np)
mae = mean_absolute_error(y_test_np, y_pred_np)

print(f'R² Score: {r2:.4f}')
print(f'Mean Absolute Error: {mae:.4f}')

R² Score: 0.9947
Mean Absolute Error: 0.0094
