## Tip #1 🔥 - Using your GPU

In [1]:
import torch

# check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# define a tensor
tensor = torch.arange(1, 10)

# move the tensor to the gpu
gpu_tensor = tensor.to(device)

# move tensor back to cpu
cpu_tensor = gpu_tensor.cpu()

NameError: name 'devie' is not defined

## Tip #2 🔥 - Using `.inference_mode()`

In [None]:
import torch

model = ... # load your model
data = ... # load your data

with torch.infernce_mode():
    outout = model(data)
    # do something cool

## Tip #3 🔥 - Using `nn.Sequential()`

In [None]:
import torch
import torch.nn as nn

# create a simple model
model = nn.Sequential(
    nn.Linear(16, 64), # First linear layer
    nn.ReLU(), # ReLU activation function for non-linearity
    nn.Linear(64, 32),
    nn.ReLU(),
    nn.Linear(32, 2)
)

## Tip #4 🔥 - Data types in PyTorch

In [None]:
import numpy as np
import torch

# create a NumPy array
numpy_array = np.array([1, 2], [3, 4])

# convert it to a PyTorch tensor
torch_array = torch.from_numpy(numpy_array).type(torch.float32) # torch's default data type = float32

# convert back to NumPy array
numpy_array = torch_array.numpy().asytpe(np.int64) # convert back to NumPy's default data type

## Tip #5 🔥 - Attention machenism

Check out https://colab.research.google.com/drive/1hXIQ77A4TYS4y3UthWF-Ci7V7vVUoxmQ?usp=sharing#scrollTo=twSVFOM9SopW as well!

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Initialize the MultiheadAttention module
multihead_attn = nn.MultiheadAttention(embed_dim=32, num_heads=8)

# Dummy data for query, key, value
query = torch.rand(8, 8, 32)  # (L, N, E) where L is the target sequence length, N is the batch size, E is the embedding dimension
key = torch.rand(8, 8, 32)    # (S, N, E) where S is the source sequence length
value = torch.rand(8, 8, 32)

# Apply the attention to the query, key, value
attn_output, attn_output_weights = multihead_attn(query, key, value)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Convert the attention weights to a NumPy array and plot it
attn_weights_np = attn_output_weights.detach().numpy()
sns.heatmap(attn_weights_np[0], annot=True, fmt=".2f")
plt.show()

## Tip #6 🔥 - Loading models for transfer learning

In [None]:
import torch
from torchvision import models
import torch.nn as nn

class SimpleNet(nn.Module):
    def __init__(self, num_classes):
        super(SimpleNet, self).__init__()
        self.resnet = models.resnet18(pretrained=True)
        for param in self.resnet.parameters():
            param.requires_grad = False

        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, num_classes)

    def forward(self, x):
        x = self.resnet(x)
        return x

## Tip #7 🔥 - Torchinfo to inspect models

In [4]:
from torchvision import models
from torchinfo import summary

model =  models.resnet18(pretrained=True)
summary(model)

Layer (type:depth-idx)                   Param #
ResNet                                   --
├─Conv2d: 1-1                            9,408
├─BatchNorm2d: 1-2                       128
├─ReLU: 1-3                              --
├─MaxPool2d: 1-4                         --
├─Sequential: 1-5                        --
│    └─BasicBlock: 2-1                   --
│    │    └─Conv2d: 3-1                  36,864
│    │    └─BatchNorm2d: 3-2             128
│    │    └─ReLU: 3-3                    --
│    │    └─Conv2d: 3-4                  36,864
│    │    └─BatchNorm2d: 3-5             128
│    └─BasicBlock: 2-2                   --
│    │    └─Conv2d: 3-6                  36,864
│    │    └─BatchNorm2d: 3-7             128
│    │    └─ReLU: 3-8                    --
│    │    └─Conv2d: 3-9                  36,864
│    │    └─BatchNorm2d: 3-10            128
├─Sequential: 1-6                        --
│    └─BasicBlock: 2-3                   --
│    │    └─Conv2d: 3-11                 73,728