## Best practices using python:
- Avoid using python funtions and loops


Finding the sum of values in an array/list

In [39]:
# Find the sum of 1000000 items
import time
arr = [1] * 1000000
start_time = time.time()
sum_ = sum(arr)
print('Native python', sum_, f"{(time.time()-start_time)*1000} miliseconds")
# Now do it using numpy
import numpy as np
arr = np.ones(1000000).astype(np.int8)
start_time = time.time()
sum_ = np.sum(arr)
print('Numpy', sum_, f"{(time.time()-start_time)*1000} miliseconds")


Native python 1000000 4.724264144897461 miliseconds
Numpy 1000000 0.5729198455810547 miliseconds


Whenever you can use vectorized operations to avoid loops

In [40]:
import numpy as np
import time

# Using nested lists
nested_list = [list(range(1000)) for _ in range(1000)]
start_time = time.time()
sum_nested = [sum(i) for i in nested_list]
nested_list_time = time.time() - start_time

# Using vectorized numpy operations
arr = np.array(nested_list)
start_time = time.time()
# Specify axis to apply function along a given dimension
sum_np = np.sum(arr, axis=1)
numpy_time = time.time() - start_time


print("Sum using numpy:", sum_np[0])
print("Sum using nested lists:", sum_nested[0])
print("Execution time using numpy:", numpy_time)
print("Execution time using nested lists:", nested_list_time)

Sum using numpy: 499500
Sum using nested lists: 499500
Execution time using numpy: 0.0011365413665771484
Execution time using nested lists: 0.006396055221557617


To ensure deterministic results using torch and CUDA we have to set the seed to a few things

In [41]:
import torch
def set_torch_seed(seed: int = 42) -> None:
    """Set torch seed for reproducibility.

    :param seed: seed to set

    :return: None
    """
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_torch_seed(42)

## Training a model using Pytorch
You need to implement the following to be able to train a model using Pytorch:
- A dataset and a dataloader
- A loss function
- A model
- An optimizer



### Dataset and dataloader

In [42]:
import torch
import torchvision
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split

class CustomDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        sample = self.data[index]
        label = self.labels[index]
        return sample, label

# Create an instance of the dataset
X = torch.ones(100, 32)
y = torch.ones(100, 2)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


train_dataset = CustomDataset(X_train, y_train)
test_dataset = CustomDataset(X_test, y_test)

In [43]:
import torch
from torch.utils.data import DataLoader

# Create a dataloader
batch_size = 2
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

### The loss function

In [44]:
from torch import nn
criterion = nn.MSELoss()

In [45]:
class MyModel(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(32, 16)
        self.relu = nn.ReLU()
        self.layer2 = nn.Linear(16, 8)
        self.layer3 = nn.Linear(8, 1)

    def forward(self, x):
        x1 = self.layer1(x)
        x2 = self.relu(x1)
        x3  = self.layer2(x2)
        x4 = self.relu(x3)
        y = self.layer3(x4)
        return y

Optimizer

In [46]:
import torch
import torch.optim as optim
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Create an instance of your model
model = MyModel()
model.to(device)
# Define the learning rate
learning_rate = 0.001

# Create the Adam optimizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

## Training loop

### Train a single epoch

In [47]:
from tqdm import tqdm

def train_one_epoch(train_dataloader):
    losses = []
    model.train()
    pbar = tqdm(train_dataloader)
    for batch in pbar:
        X_batch, y_batch = batch
        X_batch = X_batch.to(device).float()
        y_batch = y_batch.to(device).float()

        # Forward pass
        y_pred = model(X_batch).squeeze(1)
        loss = criterion(y_pred, y_batch)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Print tqdm
        losses.append(loss.item())
        pbar.set_postfix(loss=sum(losses) / len(losses))

def val_one_epoch(test_dataloader):
    losses = []
    model.eval()
    pbar = tqdm(test_dataloader, unit="batch")
    with torch.no_grad():
        for batch in pbar:
            X_batch, y_batch = batch
            X_batch = X_batch.to(device).float()
            y_batch = y_batch.to(device).float()

            # Forward pass
            y_pred = model(X_batch).squeeze(1)
            loss = criterion(y_pred, y_batch)

            # Print losses
            losses.append(loss.item())
            pbar.set_postfix(loss=sum(losses) / len(losses))
    

In [49]:
num_epochs = 10
for epoch in range(num_epochs):
    train_one_epoch(train_dataloader=train_dataloader)
    val_one_epoch(test_dataloader=test_dataloader)

100%|██████████| 40/40 [00:00<00:00, 144.69it/s, loss=0]
100%|██████████| 10/10 [00:00<00:00, 281.63batch/s, loss=0]
100%|██████████| 40/40 [00:00<00:00, 125.71it/s, loss=0]
100%|██████████| 10/10 [00:00<00:00, 187.00batch/s, loss=0]
100%|██████████| 40/40 [00:00<00:00, 128.94it/s, loss=0]
100%|██████████| 10/10 [00:00<00:00, 143.62batch/s, loss=0]
100%|██████████| 40/40 [00:00<00:00, 126.98it/s, loss=0]
100%|██████████| 10/10 [00:00<00:00, 121.48batch/s, loss=0]
100%|██████████| 40/40 [00:00<00:00, 128.48it/s, loss=0]
100%|██████████| 10/10 [00:00<00:00, 141.25batch/s, loss=0]
100%|██████████| 40/40 [00:00<00:00, 141.32it/s, loss=0]
100%|██████████| 10/10 [00:00<00:00, 150.78batch/s, loss=0]
100%|██████████| 40/40 [00:00<00:00, 116.25it/s, loss=0]
100%|██████████| 10/10 [00:00<00:00, 185.10batch/s, loss=0]
100%|██████████| 40/40 [00:00<00:00, 119.50it/s, loss=0]
100%|██████████| 10/10 [00:00<00:00, 596.32batch/s, loss=0]
100%|██████████| 40/40 [00:00<00:00, 118.20it/s, loss=0]
100%|██

In [51]:
import dask

@dask.delayed
def create_dummy_data():
    return np.ones(1000000)

dummy_data = create_dummy_data()
print(dummy_data)

Delayed('create_dummy_data-e6190fcb-b881-4973-abdf-3306b4573a15')


In [52]:
computed_data = dask.compute(dummy_data)
print(len(computed_data), computed_data)

(array([1., 1., 1., ..., 1., 1., 1.]),)


In [53]:
@dask.delayed
def create_dummy_data():
    return np.ones(1000000)

dummy_data_array = [create_dummy_data() for _ in range(100)]
print(dummy_data_array)

[Delayed('create_dummy_data-4283a465-6277-47f9-85ea-559869b35a9d'), Delayed('create_dummy_data-a53268d6-0f7b-4140-ab78-43db1c9eb4cf'), Delayed('create_dummy_data-1dcde7c8-346b-48af-aafb-cc4a03737a23'), Delayed('create_dummy_data-9dee5d67-8b94-4322-b058-d3817d2c3b55'), Delayed('create_dummy_data-e09304d0-54d9-4e59-8c0b-d86b5a247092'), Delayed('create_dummy_data-4e5244ff-9813-4faf-8d97-1da94d35aba0'), Delayed('create_dummy_data-f237f855-82ef-428f-ae3b-a7fedebbaf08'), Delayed('create_dummy_data-8def60db-21a9-4a5b-ae8b-1efab06103ff'), Delayed('create_dummy_data-e2ac3d1f-1f58-4891-bdf6-7fc3161b63a2'), Delayed('create_dummy_data-a1f69fca-ea0d-48b6-935a-ef75246d7174'), Delayed('create_dummy_data-3626ce26-0615-423e-8249-f8d9ec011cd6'), Delayed('create_dummy_data-08cdd904-5932-4812-a4ad-0e91e5f03716'), Delayed('create_dummy_data-02bd0721-8d68-4bea-bb9a-28c4d11884e4'), Delayed('create_dummy_data-b3108b43-3180-4988-a0d6-7397aaec6264'), Delayed('create_dummy_data-0fd49209-3696-4e0d-aad8-2302c4095c

In [60]:
computed_data_array = dask.compute(*dummy_data_array)
print(len(computed_data_array), computed_data_array[0])

100 [1. 1. 1. ... 1. 1. 1.]
