In [1]:
import numpy as np
import pandas as pd
import torch
import gpytorch
import math
import matplotlib.pyplot as plt
from gpytorch.models import ApproximateGP
from gpytorch.variational import CholeskyVariationalDistribution
from gpytorch.variational import UnwhitenedVariationalStrategy
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score
from gpytorch.utils.quadrature import GaussHermiteQuadrature1D


class GPRegressionModel(ApproximateGP):
    
    
    def __init__(self,inducing_points):
        
        
        variational_distribution = CholeskyVariationalDistribution(inducing_points.size(0))
        variational_strategy = UnwhitenedVariationalStrategy(
            self, inducing_points, variational_distribution, learn_inducing_locations=True
        )
        super(GPRegressionModel, self).__init__(variational_strategy)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
       

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        latent_pred = gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
        return latent_pred


In [2]:
import math
import warnings
from copy import deepcopy
from typing import Any, Optional

import torch
from torch import Tensor

from gpytorch.distributions import MultivariateNormal
from gpytorch.lazy import ZeroLazyTensor
from gpytorch.utils.warnings import GPInputWarning
from gpytorch.likelihoods import Likelihood


In [11]:
df = pd.read_csv("./data/winequality-red.csv",sep=";")

X = torch.tensor(df.iloc[:,:-1].values,dtype=torch.float32)
y = torch.tensor(df.iloc[:,-1].values,dtype=torch.float32)


In [22]:
torch.manual_seed(321)
np.random.seed(321)

folds = KFold(10)
splits = folds.split(X)

i = 0

lls = []
rmses = []

for train, test in splits:
    
    Xtrain = X[train,:]
    
    Xtrain_mean = np.mean(Xtrain.numpy(),0)
    Xtrain_std = np.std(Xtrain.numpy(),0)
    Xtrain = (Xtrain - Xtrain_mean) / Xtrain_std
    
    
    Xtest = X[test,:]
    Xtest = (Xtest - Xtrain_mean) / Xtrain_std
    
    ytrain = y[train]
    ym = np.mean(ytrain.numpy())
    ys = np.std(ytrain.numpy())
    ytrain = (ytrain-ym)/ys

    
    ytest = y[test]
    ytest = (ytest-ym)/ys
    
    Xm = np.mean(X.numpy(),0)
    Xs = np.std(X.numpy(),0)

    model = GPRegressionModel((X[:30,:]-Xm)/Xs)


    likelihood = gpytorch.likelihoods.GaussianLikelihood()
    model.train()
    likelihood.train()
        
    optimizer = torch.optim.Adam(model.parameters(), lr=0.05)
    
    mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=len(Xtrain))
    
    for j in range(350):
        optimizer.zero_grad()

        samp = np.random.choice(len(Xtrain),100,replace=True)
        Xsamp = Xtrain[samp,:]
        ysamp = ytrain[samp]
        
        output = model(Xsamp)
        loss = -mll(output, ysamp)
        loss.backward()
    
        optimizer.step()
    
    

    ll = torch.mean(torch.distributions.normal.Normal(model(Xtest).mean,likelihood.noise).log_prob(ytest)).detach().numpy()
    lls.append(ll)
    
    rmse = np.sqrt(np.mean((ytest.detach().numpy()-model(Xtest).mean.detach().numpy())**2))
    rmses.append(rmse)


    i += 1
    print(i)

1
2
3
4
5
6
7
8
9
10


In [18]:
np.mean(lls)

-1.2260442

In [19]:
np.std(lls)

0.1066122

In [20]:
np.mean(rmses)

0.8020487

In [21]:
np.std(rmses)

0.06364155

[0.46849775,
 0.46849775,
 0.46849775,
 0.46849775,
 0.46849775,
 0.46849775,
 0.46849775,
 0.46849775,
 0.46849775,
 0.46849775]