In [1]:
import torch
import torch.nn as nn
from torch.optim import SGD
from torch.utils.data import Dataset, DataLoader

import time

In [2]:
x = [[1, 2], [3, 4], [5, 6], [7, 8]]
y = [[3], [7], [11], [15]]

In [3]:
X = torch.tensor(x).float()
Y = torch.tensor(y).float()

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
X = X.to(device)
Y = Y.to(device)

In [5]:
class MyDataset(Dataset):
    def __init__(self, x, y):
        self.x = x.detach().clone()
        self.y = y.detach().clone()
    
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, ix):
        """
        Function to fetch a specific row.
        :param ix: index of the row that is to be fetched from the dataset.
        """
        return self.x[ix], self.y[ix]

## Important
`.clone()` returns a copy of data and it keeps the computation graph.   
`.detach()` returns a new view of the tensor without computation graph. (it doesn't allocate new memory for the new tensor since it's just a view)

It appears that `.detach().clone()` is more efficient than `.clone().detach()`. Because by utilizing the second approach, at first we duplicate computation graph and later abandon it which is pointless.

In [6]:
dataset = MyDataset(X, Y)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True) # fetch random sample of two data points

In [7]:
for x, y in dataloader:
    print(f"x: {x}")
    print(f"y: {y}")
    print('---')

x: tensor([[7., 8.],
        [5., 6.]], device='cuda:0')
y: tensor([[15.],
        [11.]], device='cuda:0')
---
x: tensor([[3., 4.],
        [1., 2.]], device='cuda:0')
y: tensor([[7.],
        [3.]], device='cuda:0')
---


In [8]:
class MyNeuralNet(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.input_to_hidden_layer = nn.Linear(2, 8)
        self.hidden_layer_activation = nn.ReLU()
        self.hidden_to_output_layer = nn.Linear(8, 1)
    
    def forward(self, x):
        x = self.input_to_hidden_layer(x)
        x = self.hidden_layer_activation(x)
        x = self.hidden_to_output_layer(x)
        return x

In [9]:
my_net = MyNeuralNet().to(device)
loss_function = nn.MSELoss()
opt = SGD(my_net.parameters(), lr=0.001)

In [10]:
loss_history = []
start = time.time()
for epoch in range(50):
    for data in dataloader:
        x, y = data
        opt.zero_grad()
        loss_value = loss_function(my_net(x), y)
        loss_value.backward()
        opt.step()
        loss_history.append(float(loss_value))
end = time.time()
end - start

0.5704784393310547

# Predicting on new data points

In [11]:
val_x = [[10, 11]]
val_x = torch.tensor(val_x).float().to(device)

In [12]:
my_net(val_x)

tensor([[20.4166]], device='cuda:0', grad_fn=<AddmmBackward>)