In [1]:
import pandas as pd
import torch
import gpytorch
import math
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load and prepare your dataset
dataset = pd.read_csv('dataset.csv')
dataset['Date'] = pd.to_datetime(dataset['Date'])

# Subsample the dataset
dataset = dataset.sample(frac=0.1, random_state=42)

# Select relevant features
X = dataset.drop(columns=['Date', 'PM2.5', 'Nombre_Estacion', 'Clave_Estacion'])  # Adjust columns as needed
y = dataset['PM2.5']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to PyTorch tensors
train_x = torch.tensor(X_train, dtype=torch.float32)
train_y = torch.tensor(y_train.values, dtype=torch.float32)
test_x = torch.tensor(X_test, dtype=torch.float32)
test_y = torch.tensor(y_test.values, dtype=torch.float32)

In [2]:
class ExactGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())  # RBF kernel is common for continuous data

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

# Initialize the model and likelihood
likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = ExactGPModel(train_x, train_y, likelihood)


In [3]:
model.train()
likelihood.train()

# Use the Adam optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Marginal Log Likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

# Training loop
training_iter = 500
for i in range(training_iter):
    optimizer.zero_grad()
    output = model(train_x)
    loss = -mll(output, train_y)
    loss.backward()
    print(f'Iter {i + 1}/{training_iter} - Loss: {loss.item():.3f}')
    optimizer.step()


Iter 1/500 - Loss: 192.997
Iter 2/500 - Loss: 192.710
Iter 3/500 - Loss: 192.422
Iter 4/500 - Loss: 192.133
Iter 5/500 - Loss: 191.846
Iter 6/500 - Loss: 191.562
Iter 7/500 - Loss: 191.273
Iter 8/500 - Loss: 190.988
Iter 9/500 - Loss: 190.701
Iter 10/500 - Loss: 190.418
Iter 11/500 - Loss: 190.134
Iter 12/500 - Loss: 189.848
Iter 13/500 - Loss: 189.566
Iter 14/500 - Loss: 189.285
Iter 15/500 - Loss: 189.001
Iter 16/500 - Loss: 188.720
Iter 17/500 - Loss: 188.439
Iter 18/500 - Loss: 188.159
Iter 19/500 - Loss: 187.879
Iter 20/500 - Loss: 187.599
Iter 21/500 - Loss: 187.321
Iter 22/500 - Loss: 187.042
Iter 23/500 - Loss: 186.764
Iter 24/500 - Loss: 186.488
Iter 25/500 - Loss: 186.210
Iter 26/500 - Loss: 185.935


KeyboardInterrupt: 