In [39]:
import torch
from sklearn.datasets import fetch_california_housing
from data.low_dimensional import RegressionDataset
from torch.utils.data import DataLoader, random_split
from utils import train_utils
from models import MLP
from torch import optim
import logging

In [30]:
if torch.cuda.is_available():
    print('Using cuda')
    device = 'cuda'
else:
    print('Using cpu')
    device = 'cpu'

Using cuda


In [132]:
housing = fetch_california_housing()
images = torch.tensor(housing.target, dtype=torch.float32).unsqueeze(-1)
labels = torch.tensor(housing.data, dtype=torch.float32)

image_dim, label_dim = 1, labels.shape[1]

# dataset = TensorDataset(images, labels)
dataset = RegressionDataset(images=images, labels=labels, standardize=True)

# Load dataset
# data = fetch_california_housing()
# X, y = data.data, data.target

# # Split into train and test sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# # Normalize features
# # scaler = StandardScaler()
# # X_train = scaler.fit_transform(X_train)
# # X_test = scaler.transform(X_test)

# # Convert to PyTorch tensors
# X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
# X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
# y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
# y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# # Create data loaders
# train_dataset = TensorDataset(y_train_tensor, X_train_tensor)
# test_dataset = TensorDataset(y_test_tensor, X_test_tensor)
# train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [133]:
next(iter(dataset))

(tensor([2.1296]),
 tensor([-0.2648, -0.2123, -0.2670, -0.2766,  0.2397, -0.2741, -0.2173, -0.4748]))

In [134]:
hidden_dim = 128
n_layers = 2
dropout = 0.0

report_every = 10
early_stopping = 100
weight_decay = 0.0
learning_rate = 1e-4

n_epochs = 100

In [135]:
criterion = torch.nn.MSELoss()

model = MLP(target_dim=image_dim, conditioning_dim=label_dim, hidden_dim=hidden_dim, layers=n_layers, dropout=dropout).to(device)

optimizer = optim.AdamW(
    model.parameters(),
    lr=learning_rate,
    betas=(0.9, 0.999),
    weight_decay=weight_decay)

report_every = report_every
early_stopper = train_utils.EarlyStopper(
    patience=int(early_stopping / report_every),
    min_delta=0.0001,
)
running_loss = 0

training_loss_list = []
validation_loss_list = []
validation_loss_list_ema = []
epochs = []

best_loss = torch.inf

batch_size = 64

training_dataset, validation_dataset, test_dataset = random_split(dataset, [0.8, 0.1, 0.1])
train_loader = DataLoader(training_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=True)

In [136]:
for epoch in range(n_epochs):
    # logging.info(using("At the start of the epoch"))

    model.train()
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad(set_to_none=True)
    
        predicted_images = model(labels)
        loss = criterion(images, predicted_images)

        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    if epoch % report_every == report_every - 1:
        epochs.append(epoch)
        model.eval()

        validation_loss = 0
        validation_loss_ema = 0
        
        with torch.no_grad():
            for images, labels in val_loader:
                images = images.to(device)
                labels = labels.to(device)
                
                sampled_images = model(labels)
                
                validation_loss += criterion(sampled_images, images).item()
        
        validation_loss_list.append(
            validation_loss / len(val_loader)
        )
        validation_loss_list_ema.append(
            validation_loss_ema / len(val_loader)
        )
        training_loss_list.append(
            running_loss / report_every / (len(train_loader))
        )
        running_loss = 0.0

    
            
        if early_stopper.early_stop(validation_loss):
                print(f"EP {epoch}: Early stopping")
                break
        
        print(
                f"[{epoch + 1:5d}] Training loss: {training_loss_list[-1]:.8f}, Validation loss: "
                f"{validation_loss_list[-1]:.8f}"
            )

[   10] Training loss: 0.99828591, Validation loss: 0.98028085
[   20] Training loss: 0.99310509, Validation loss: 0.97209858
[   30] Training loss: 0.96868437, Validation loss: 0.92027974
[   40] Training loss: 0.90877140, Validation loss: 0.84273174
[   50] Training loss: 0.78278273, Validation loss: 0.70300977
[   60] Training loss: 0.61729401, Validation loss: 0.53115863
[   70] Training loss: 0.53582132, Validation loss: 0.47654445
[   80] Training loss: 0.50662096, Validation loss: 0.48233858
[   90] Training loss: 0.49378373, Validation loss: 0.47523250
[  100] Training loss: 0.48650760, Validation loss: 0.45663595


In [119]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset

# Load dataset
data = fetch_california_housing()
X, y = data.data, data.target

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize features
# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

batch_size = 256

# Create data loaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define the MLP model
# class MLP(nn.Module):
#     def __init__(self, input_dim):
#         super(MLP, self).__init__()
#         self.model = nn.Sequential(
#             nn.Linear(input_dim, 64),
#             nn.ReLU(),
#             nn.Linear(64, 64),
#             nn.ReLU(),
#             nn.Linear(64, 1)
#         )
    
#     def forward(self, x):
#         return self.model(x)

# Initialize model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# model = MLP(X_train.shape[1]).to(device)
model = MLP(target_dim=image_dim, conditioning_dim=label_dim, hidden_dim=hidden_dim, layers=n_layers, dropout=dropout).to(device)


criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 100
first_time = True
for epoch in range(epochs):
    model.train()
    epoch_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        # X_batch, y_batch = y_batch.to(device), X_batch.to(device)
        if first_time:
            print(f'X_batch.shape: {X_batch.shape}')
            first_time = False
        optimizer.zero_grad()
        predictions = model(X_batch)
        loss = criterion(predictions, y_batch)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss/len(train_loader):.4f}")

# Evaluate the model
model.eval()
total_loss = 0
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        # X_batch, y_batch = y_batch.to(device), X_batch.to(device)
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        
        predictions = model(X_batch)
        loss = criterion(predictions, y_batch)
        total_loss += loss.item()

print(f"Test MSE: {total_loss / len(test_loader):.4f}")


X_batch.shape: torch.Size([256, 8])
Epoch 10/100, Loss: 1.4868
Epoch 20/100, Loss: 1.4235
Epoch 30/100, Loss: 1.1153
Epoch 40/100, Loss: 1.0950
Epoch 50/100, Loss: 0.7108
Epoch 60/100, Loss: 0.6226
Epoch 70/100, Loss: 0.6128
Epoch 80/100, Loss: 0.6201
Epoch 90/100, Loss: 0.5969
Epoch 100/100, Loss: 0.6481
Test MSE: 0.6072


In [None]:
X_train_tensor.shape, y_train_tensor.shape

(torch.Size([16512]), torch.Size([132096, 1]))

In [None]:
X_train.shape, y_train.shape, X.shape, y.shape

((16512,), (16512, 8), (20640,), (20640, 8))

In [110]:
TensorDataset(X_train_tensor, y_train_tensor)

AssertionError: Size mismatch between tensors

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset

# Load dataset
data = fetch_california_housing()
X, y = data.data, data.target

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

y_train = y_train.reshape(-1, 1)
y_test = y_test.reshape(-1, 1)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Create data loaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define the Diffusion Model
class DiffusionModel(nn.Module):
    def __init__(self, input_dim, timesteps=100):
        super(DiffusionModel, self).__init__()
        self.timesteps = timesteps
        self.model = nn.Sequential(
            nn.Linear(input_dim + 1, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, input_dim)
        )
    
    def forward(self, x, t):
        t = t.view(-1, 1).expand(-1, x.shape[1])
        x_t = torch.cat([x, t], dim=1)
        return self.model(x_t)

# Diffusion process functions
def forward_diffusion(x, noise, alpha_bar):
    return torch.sqrt(alpha_bar) * x + torch.sqrt(1 - alpha_bar) * noise

def generate_alpha_bar(timesteps):
    beta = torch.linspace(0.0001, 0.02, timesteps)
    alpha = 1 - beta
    alpha_bar = torch.cumprod(alpha, dim=0)
    return alpha_bar

# Initialize model and training components
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
timesteps = 100
alpha_bar = generate_alpha_bar(timesteps).to(device)

model = DiffusionModel(X_train.shape[1]).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

# Training loop
epochs = 100
for epoch in range(epochs):
    model.train()
    epoch_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        t = torch.randint(0, timesteps, (X_batch.shape[0],), device=device)
        noise = torch.randn_like(X_batch)
        X_noisy = forward_diffusion(X_batch, noise, alpha_bar[t])
        
        optimizer.zero_grad()
        predicted_noise = model(X_noisy, t.float())
        loss = criterion(predicted_noise, noise)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss/len(train_loader):.4f}")

# Evaluate model
model.eval()
total_loss = 0
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        t = torch.randint(0, timesteps, (X_batch.shape[0],), device=device)
        noise = torch.randn_like(X_batch)
        X_noisy = forward_diffusion(X_batch, noise, alpha_bar[t])
        predicted_noise = model(X_noisy, t.float())
        loss = criterion(predicted_noise, noise)
        total_loss += loss.item()

print(f"Test Loss: {total_loss / len(test_loader):.4f}")

RuntimeError: The size of tensor a (32) must match the size of tensor b (8) at non-singleton dimension 1