<a href="https://colab.research.google.com/github/Sabelz/Master_Thesis_Alexander/blob/main/utils/PowerGPs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Gaussian Processes on the Power Plant dataset

In [1]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/Master_Thesis_Alexander
!git config --global user.email "alexander.sabelstrom.1040@student.uu.se"
!git config --global user.name "Sabelz"

Mounted at /content/drive
/content/drive/MyDrive/Master_Thesis_Alexander


# Imports

In [45]:
import numpy as np
import matplotlib.pyplot as plt
import torch
#!pip install gpytorch > \dev\null # Suppress prints
import gpytorch
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler

%matplotlib inline
%load_ext autoreload
%autoreload 2
%run "datasets/power.ipynb" # Run the Power notebook
%run "utils/models.ipynb" # Run the models notebook
%run "utils/functions.ipynb" # Run the functions notebook

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Master_Thesis_Alexander
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9568 entries, 0 to 9567
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   AT      9568 non-null   float64
 1   V       9568 non-null   float64
 2   AP      9568 non-null   float64
 3   RH      9568 non-null   float64
 4   PE      9568 non-null   float64
dtypes: float64(5)
memory usage: 373.9 KB
None

AT    False
V     False
AP    False
RH    False
PE    False
dtype: bool
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Master_Thesis_Alexander
----------------------------------------------------------
ALL MODELS: 
KIS

# Prepare The Data

In [79]:
powerPlant_data = df_PowerPlant # df_PowerPlant is defined in ../datasets/power.ipynb
# The last column is output(net hourly electrical energy output (EP)  of the plant)
x_power, y_power = powerPlant_data.iloc[:, :-1].to_numpy() , powerPlant_data.iloc[:, -1].to_numpy()


x_train_power, x_test_power, y_train_power, y_test_power = train_test_split(x_power, y_power, test_size=0.2, random_state=666)
# Transform into tensors
x_train_power, x_test_power, y_train_power, y_test_power = (
torch.from_numpy(x_train_power).float(), torch.from_numpy(x_test_power).float(),
torch.from_numpy(y_train_power).float(), torch.from_numpy(y_test_power).float())

# Standardized data
scaler = StandardScaler()
x_train_power_standardized = torch.from_numpy(scaler.fit_transform(x_train_power)).float()
# Use the same scaler that was used on training data
x_test_power_standardized = torch.from_numpy(scaler.transform(x_test_power)).float()

if torch.cuda.is_available():
    x_train_power, y_train_power, x_test_power, y_test_power, x_train_power_standardized, x_test_power_standardized = (
    x_train_power.cuda(), y_train_power.cuda(), x_test_power.cuda(), y_test_power.cuda(),
    x_train_power_standardized.cuda(), x_test_power_standardized.cuda())

# Train data into batches
train_dataset_power = TensorDataset(x_train_power, y_train_power)
train_loader_power = DataLoader(train_dataset_power, batch_size=500, shuffle=True)

# Test data into batches
test_dataset_power = TensorDataset(x_test_power, y_test_power)
test_loader_power = DataLoader(test_dataset_power, batch_size=500, shuffle=False)

# Define the KISS-GP Model for 1D-4D data

In [95]:
KISSGP_likelihood_power = gpytorch.likelihoods.GaussianLikelihood() # Likelihood
KISSGP_mean_power = gpytorch.means.ConstantMean() # Mean
KISSGP_kernel_power = gpytorch.kernels.RBFKernel() # Kernel
KISSGP_model_power = KISSGP_NDim(x_train_power, y_train_power, KISSGP_likelihood_power, KISSGP_mean_power, KISSGP_kernel_power) # KISSGP defined in utils/Models.ipynb
# One with standardized features
KISSGP_likelihood_power_standardized = gpytorch.likelihoods.GaussianLikelihood() # Likelihood
KISSGP_mean_power_standardized = gpytorch.means.ConstantMean() # Mean
KISSGP_kernel_power_standardized = gpytorch.kernels.RBFKernel() # Kernel
KISSGP_model_power_standardized = KISSGP_NDim(x_train_power_standardized, y_train_power,
                                              KISSGP_likelihood_power_standardized, KISSGP_mean_power_standardized, KISSGP_kernel_power_standardized) # KISSGP defined in utils/Models.ipynb

# Train and Evaluate the KISS-GP Model

In [96]:
KISSGP_power_time = train(KISSGP_model_power, KISSGP_likelihood_power,
                          x_train_power, y_train_power, training_iter= 40) # train is defined in utils/functions.ipynb
# Standardized version
KISSGP_power_time_standardized = train(KISSGP_model_power_standardized, KISSGP_likelihood_power_standardized,
                                       x_train_power_standardized, y_train_power, training_iter= 40) # train is defined in utils/functions.ipynb

In [97]:
with torch.no_grad(), gpytorch.settings.fast_pred_var():
  predictions_KISSGP_power = predict(KISSGP_model_power, KISSGP_likelihood_power,
                                     x_test_power) # predict is defined in utils/functions.ipynb

  RMSE_KISSGP_power, NLPD_KISSGP_power = error_metrics(x_test_power, y_test_power,
                                    predictions_KISSGP_power.mean,
                                    model = KISSGP_model_power,
                                    likelihood = KISSGP_likelihood_power)# error_metrics defined in utils/functions.ipynb


  # Standardized version
  predictions_KISSGP_power_standardized = predict(KISSGP_model_power_standardized, KISSGP_likelihood_power_standardized,
                                                  x_test_power_standardized) # predict is defined in utils/functions.ipynb

  RMSE_KISSGP_power_standardized, NLPD_KISSGP_power_standardized = error_metrics(x_test_power_standardized, y_test_power,
                                    predictions_KISSGP_power_standardized.mean,
                                    model = KISSGP_model_power_standardized,
                                    likelihood = KISSGP_likelihood_power_standardized)# error_metrics defined in utils/functions.ipynb


  print("RMSE KISS-GP: ",RMSE_KISSGP_power)
  print("NLPD KISS-GP: ",NLPD_KISSGP_power)
  print("Time KISS-GP: ", KISSGP_power_time)

  print("RMSE KISS-GP Standardized: ",RMSE_KISSGP_power_standardized)
  print("NLPD KISS-GP Standardized: ",NLPD_KISSGP_power_standardized)
  print("Time KISS-GP Standardized: ", KISSGP_power_time_standardized)



RMSE KISS-GP:  4.3208427
NLPD KISS-GP:  tensor(8.0668, device='cuda:0')
Time KISS-GP:  23.90242838859558
RMSE KISS-GP Standardized:  4.9411435
NLPD KISS-GP Standardized:  tensor(5.4955, device='cuda:0')
Time KISS-GP Standardized:  11.596442222595215


# Inducing Points Model

In [None]:
inducing_likelihood_power = gpytorch.likelihoods.GaussianLikelihood()
inducing_mean_power = gpytorch.means.ConstantMean()
inducing_kernel_power = gpytorch.kernels.RBFKernel()
n_inducing_points = 150
# Generate a tensor of random indices
indices = torch.randperm(x_train_power.size(0))
# Select N random rows
inducing_points = x_train_power[indices][:n_inducing_points]
inducing_model_power = InducingGP(inducing_likelihood_power, inducing_mean_power, inducing_kernel_power, inducing_points) # InducingGP defined in utils/models.ipynb
inducing_model_power = inducing_model_power.to(device) # Move model to device, defined in utils/models.ipynb

# Train and Evaluate Inducing Points Model

In [None]:
# Use train_loader_power
inducing_power_time = train_ELBO(inducing_model_power, inducing_likelihood_power, x_train_power, y_train_power, training_iter= 1000, train_loader=train_loader_power) # train is defined in utils/functions.ipynb

In [None]:
with torch.no_grad(), gpytorch.settings.fast_pred_var():
  # predict is defined in utils/functions.ipynb
  predictions_inducing_power = predict(inducing_model_power, inducing_likelihood_power, x_test_power)

  # Root Mean Square Error(RMSE)
  # NumPy can only handle CPU tensors
  y_test_power_cpu = y_test_power.cpu()
  predictions_inducing_power_cpu = predictions_inducing_power.mean.cpu()
  RMSE_inducing_power = mean_squared_error(y_test_power_cpu, predictions_inducing_power_cpu, squared=False)
  print("RMSE Inducing Model: ",RMSE_inducing_power)
  print("Time Inducing Model: ", lnducing_power_time)

RMSE Inducing Model:  4.354558303746978
