In [3]:
import numpy as np
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
import torch
import matplotlib.pyplot as plt


Looking in indexes: https://download.pytorch.org/whl/cu118Note: you may need to restart the kernel to use updated packages.

Collecting torch
  Downloading https://download.pytorch.org/whl/cu118/torch-2.4.1%2Bcu118-cp312-cp312-win_amd64.whl (2695.4 MB)
     ---------------------------------------- 0.0/2.7 GB ? eta -:--:--
     ---------------------------------------- 0.0/2.7 GB 1.7 MB/s eta 0:27:12
     ---------------------------------------- 0.0/2.7 GB 2.5 MB/s eta 0:18:01
     ---------------------------------------- 0.0/2.7 GB 2.7 MB/s eta 0:16:37
     ---------------------------------------- 0.0/2.7 GB 3.7 MB/s eta 0:12:01
     ---------------------------------------- 0.0/2.7 GB 3.7 MB/s eta 0:12:12
     ---------------------------------------- 0.0/2.7 GB 4.5 MB/s eta 0:10:04
     ---------------------------------------- 0.0/2.7 GB 4.6 MB/s eta 0:09:52
     ---------------------------------------- 0.0/2.7 GB 4.6 MB/s eta 0:09:52
     ---------------------------------------- 0.0/2.

In [10]:
np.random.seed(45)
num_samples = 40

x1 = np.random.uniform(-1, 1, num_samples)
f_x = 3*x1 + 4
eps = np.random.randn(num_samples)
y = f_x + eps

ones = np.ones(x1.shape)
X = np.column_stack((ones,x1))
X = torch.from_numpy(X).to(torch.float32)
y = torch.from_numpy(y).to(torch.float32)

torch.float32

In [106]:
def true_gradient_descent(X,y,theta,learning_rate,epsilon,max_epochs):
    theta_evolution = []
    loss_tgd = []
    epoch_count = max_epochs
    for epoch in range(max_epochs):
        
        y_pred = torch.matmul(X,theta)
        loss = torch.mean((y_pred - y)**2)
        loss_tgd.append(loss.item())
        loss.backward()
        grad_norm = torch.norm(theta.grad)
        if grad_norm < epsilon:
            epoch_count = epoch+1
            break
        theta_evolution.append(theta.clone().detach())

        with torch.no_grad():
            theta -= theta.grad*learning_rate
            theta.grad.zero_()
        
    return theta_evolution,epoch_count,loss_tgd

In [107]:
max_epochs = 15
learning_rate = 0.5
epsilon = 0.001
theta = torch.randint(2,10,(2,),dtype = torch.float32,requires_grad = True)
theta_evolution,eps_count_true_gradient,loss_tgd = true_gradient_descent(X,y,theta,learning_rate,epsilon,max_epochs)
theta_evolution

[tensor([4., 9.]),
 tensor([4.4857, 7.0088]),
 tensor([4.3171, 5.6877]),
 tensor([4.2052, 4.7695]),
 tensor([4.1275, 4.1319]),
 tensor([4.0735, 3.6891]),
 tensor([4.0360, 3.3815]),
 tensor([4.0099, 3.1680]),
 tensor([3.9918, 3.0196]),
 tensor([3.9793, 2.9166]),
 tensor([3.9705, 2.8451]),
 tensor([3.9645, 2.7954]),
 tensor([3.9603, 2.7609]),
 tensor([3.9573, 2.7369]),
 tensor([3.9553, 2.7203])]

In [3]:
from einops import rearrange
def create_coordinate_map(height, width, device):
    """
    img: torch.Tensor of shape (num_channels, height, width)
    
    return: tuple of torch.Tensor of shape (height * width, 2) and torch.Tensor of shape (height * width, num_channels)
    """
    
    num_channels = 3
    
    # Create a 2D grid of (x,y) coordinates (h, w)
    # width values change faster than height values
    w_coords = torch.arange(width).repeat(height, 1)
    h_coords = torch.arange(height).repeat(width, 1).t()
    print(w_coords)
    
    w_coords = w_coords.reshape(-1)
    h_coords = h_coords.reshape(-1)
    print(h_coords)

    # Combine the x and y coordinates into a single tensor
    X = torch.stack([h_coords, w_coords], dim=1).float()
    print(X)
    # Move X to GPU if available
    X = X.to(device)

    # Reshape the image to (h * w, num_channels)
    # Y = rearrange(img, 'c h w -> (h w) c').float()
    # print(Y, "*")
    return X

In [2]:
def create_rff_features(X, num_features, sigma, device):
    from sklearn.kernel_approximation import RBFSampler
    rff = RBFSampler(n_components=num_features, gamma=1/(2 * sigma**2))
    X = X.cpu().numpy()
    X = rff.fit_transform(X)
    return torch.tensor(X, dtype=torch.float32).to(device)

In [4]:
# MinMaxScaler from -1 to 1
from sklearn import preprocessing
import torch

device = torch.device("cpu")
dog_X = create_coordinate_map(300,300, device)
scaler_X = preprocessing.MinMaxScaler(feature_range=(-1, 1)).fit(dog_X.cpu())

# Scale the X coordinates
dog_X_scaled = scaler_X.transform(dog_X.cpu())

# Move the scaled X coordinates to the GPU
dog_X_scaled = torch.tensor(dog_X_scaled).to(device)
# Set to dtype float32
dog_X_scaled = dog_X_scaled.float()
X_rff = create_rff_features(dog_X_scaled, 5000, 0.008, device)
X_rff




tensor([[  0,   1,   2,  ..., 297, 298, 299],
        [  0,   1,   2,  ..., 297, 298, 299],
        [  0,   1,   2,  ..., 297, 298, 299],
        ...,
        [  0,   1,   2,  ..., 297, 298, 299],
        [  0,   1,   2,  ..., 297, 298, 299],
        [  0,   1,   2,  ..., 297, 298, 299]])
tensor([  0,   0,   0,  ..., 299, 299, 299])
tensor([[  0.,   0.],
        [  0.,   1.],
        [  0.,   2.],
        ...,
        [299., 297.],
        [299., 298.],
        [299., 299.]])


tensor([[ 0.0044, -0.0200,  0.0107,  ..., -0.0150,  0.0063,  0.0172],
        [-0.0076, -0.0200,  0.0100,  ..., -0.0179,  0.0004,  0.0200],
        [-0.0168, -0.0200,  0.0093,  ..., -0.0196, -0.0056,  0.0177],
        ...,
        [-0.0158,  0.0140,  0.0186,  ...,  0.0195,  0.0181, -0.0200],
        [-0.0200,  0.0145,  0.0188,  ...,  0.0177,  0.0198, -0.0175],
        [-0.0169,  0.0149,  0.0191,  ...,  0.0147,  0.0197, -0.0106]])

In [8]:
class LinearModel(torch.nn.Module):
    def __init__(self, in_features, out_features):
        super(LinearModel, self).__init__()
        self.linear = torch.nn.Linear(in_features, out_features)
        
    def forward(self, x):
        return self.linear(x)


In [11]:
def train(net, lr, X, Y, epochs, verbose=True):
    """
    net: torch.nn.Module
    lr: float
    X: torch.Tensor of shape (num_samples, 2)
    Y: torch.Tensor of shape (num_samples, 3)
    """

    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=lr)
    for epoch in range(epochs):
        optimizer.zero_grad()
        outputs = net(X)
        
        
        loss = criterion(outputs, Y)
        loss.backward()
        optimizer.step()
        if verbose and epoch % 100 == 0:
            print(f"Epoch {epoch} loss: {loss.item():.6f}")
    return loss.item()

In [12]:
net = LinearModel(X_rff.shape[1], 3)
net.to(device)


train(net, 0.005, X_rff, dog_Y, 2500)

NameError: name 'dog_Y' is not defined