<a href="https://colab.research.google.com/github/Sabelz/Master_Thesis_Alexander/blob/main/GPs/ConcreteGPs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Gaussian Processes on the Concrete dataset

In [1]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/Master_Thesis_Alexander
!git config --global user.email "alexander.sabelstrom.1040@student.uu.se"
!git config --global user.name "Sabelz"

Mounted at /content/drive
/content/drive/MyDrive/Master_Thesis_Alexander


# Imports

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import torch
!pip install gpytorch > \dev\null # Suppress prints
import gpytorch
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from torch.utils.data import TensorDataset, DataLoader

%matplotlib inline
%load_ext autoreload
%autoreload 2
%run "datasets/concrete.ipynb" # Run the Concrete notebook
%run "utils/models.ipynb" # Run the models notebook
%run "utils/functions.ipynb" # Run the functions notebook

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Master_Thesis_Alexander
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1030 entries, 0 to 1029
Data columns (total 9 columns):
 #   Column                                                 Non-Null Count  Dtype  
---  ------                                                 --------------  -----  
 0   Cement (component 1)(kg in a m^3 mixture)              1030 non-null   float64
 1   Blast Furnace Slag (component 2)(kg in a m^3 mixture)  1030 non-null   float64
 2   Fly Ash (component 3)(kg in a m^3 mixture)             1030 non-null   float64
 3   Water  (component 4)(kg in a m^3 mixture)              1030 non-null   float64
 4   Superplasticizer (component 5)(kg in a m^3 mixture)    1030 non-null   float64
 5   Coarse Aggregate  (component 6)(kg in a m^3 mixture)   1030 non-null   float64
 6   Fine Aggregate (component 7)(kg in a m^3 

# Prepare The Data

In [6]:
concrete_data = df_Concrete # df_Concrete is defined in ../datasets/concrete.ipynb
# Concrete compressive strength(last feature) is the regression problem
x_concrete, y_concrete = concrete_data.iloc[:, :-1].to_numpy() , concrete_data.iloc[:, -1].to_numpy()

x_train_concrete, x_test_concrete, y_train_concrete, y_test_concrete = (
    train_test_split(x_concrete, y_concrete, test_size=0.2, random_state=666))
# Transform into tensors
x_train_concrete, x_test_concrete, y_train_concrete, y_test_concrete = (
torch.from_numpy(x_train_concrete).float(), torch.from_numpy(x_test_concrete).float(),
torch.from_numpy(y_train_concrete).float(), torch.from_numpy(y_test_concrete).float())

if torch.cuda.is_available():
    x_train_concrete, y_train_concrete, x_test_concrete, y_test_concrete = (
    x_train_concrete.cuda(), y_train_concrete.cuda(), x_test_concrete.cuda(), y_test_concrete.cuda())

# Train data into batches
train_dataset_concrete = TensorDataset(x_train_concrete, y_train_concrete)
train_loader_concrete = DataLoader(train_dataset_concrete, batch_size=500, shuffle=True)

# Test data into batches
test_dataset_concrete = TensorDataset(x_test_concrete, y_test_concrete)
test_loader_concrete = DataLoader(test_dataset_concrete, batch_size=500, shuffle=False)

# Define the KISS-GP Model for higher dimensions

In [7]:
KISSGP_likelihood_concrete = gpytorch.likelihoods.GaussianLikelihood() # Likelihood
KISSGP_mean_concrete = gpytorch.means.ConstantMean() # Mean
KISSGP_kernel_concrete = gpytorch.kernels.RBFKernel() # Kernel
KISSGP_model_concrete = KISSGP_NDim(x_train_concrete, y_train_concrete, KISSGP_likelihood_concrete, KISSGP_mean_concrete, KISSGP_kernel_concrete) # KISSGP_NDim defined in utils/Models.ipynb

# Train and Evaluate the KISS-GP Model

In [8]:
train(KISSGP_model_concrete, KISSGP_likelihood_concrete, x_train_concrete, y_train_concrete, training_iter= 40) # train is defined in utils/functions.ipynb

In [9]:
with torch.no_grad(), gpytorch.settings.fast_pred_var():
  predictions_KISSGP_concrete = predict(KISSGP_model_concrete, KISSGP_likelihood_concrete, x_test_concrete) # predict is defined in utils/functions.ipynb

# Root Mean Square Error(RMSE)
RMSE_KISSGP_concrete = mean_squared_error(y_test_concrete, predictions_KISSGP_concrete.mean, squared=False)
print("RMSE KISS-GP: ",RMSE_KISSGP_concrete)

RMSE KISS-GP:  7.270204


# Inducing Point Model

In [10]:
inducing_likelihood_concrete = gpytorch.likelihoods.GaussianLikelihood()
inducing_mean_concrete = gpytorch.means.ConstantMean()
inducing_kernel_concrete = gpytorch.kernels.RBFKernel()
n_inducing_points = 150
# Generate a tensor of random indices
indices = torch.randperm(x_train_concrete.size(0))
# Select N random rows
inducing_points = x_train_concrete[indices][:n_inducing_points]
inducing_model_concrete = InducingGP(inducing_likelihood_concrete, inducing_mean_concrete, inducing_kernel_concrete, inducing_points) # InducingGP defined in utils/models.ipynb
inducing_model_concrete = inducing_model_concrete.to(device) # Move model to device, defined in utils/models.ipynb

# Train and Evaluate Inducing Point Model

In [11]:
# Use train_loader_concrete
train_ELBO(inducing_model_concrete, inducing_likelihood_concrete, x_train_concrete, y_train_concrete, training_iter= 1000, train_loader=train_loader_concrete) # train is defined in utils/functions.ipynb

In [12]:
with torch.no_grad(), gpytorch.settings.fast_pred_var():
  # predict is defined in utils/functions.ipynb
  predictions_inducing_concrete = predict(inducing_model_concrete, inducing_likelihood_concrete, x_test_concrete)
  # Root Mean Square Error(RMSE)
  # NumPy can only handle CPU tensors
  y_test_concrete_cpu = y_test_concrete.cpu()
  predictions_inducing_concrete_cpu = predictions_inducing_concrete.mean.cpu()
  RMSE_inducing_concrete = mean_squared_error(y_test_concrete_cpu, predictions_inducing_concrete_cpu, squared=False)
  print("RMSE Inducing Model: ",RMSE_inducing_concrete)

RMSE Inducing Model:  6.313922460669976
