# Appendix A: The Little Book of Tensors
Welcome to Appendix A. This notebook contains the listings for Appendix A, which introduces the essential properties of tensors and the operations you can perform on them.

#Listing A-1 Properties of Tensors

In [None]:
import torch

tensor = torch.tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True)

print("Shape:", tensor.shape)
print("Rank (Number of Dimensions):", tensor.dim())
print("Number of Elements:", tensor.numel())
print("Data Type:", tensor.dtype)
print("Device:", tensor.device)
print("Strides:", tensor.stride())
print("Requires Gradient:", tensor.requires_grad)
print("Gradient Function:", tensor.grad_fn)
print("Is Contiguous:", tensor.is_contiguous())
print("Element Size (bytes):", tensor.element_size())
print("Storage Offset:", tensor.storage_offset())
print("Data Pointer:", tensor.data_ptr())
print("Layout:", tensor.layout)
print("Is Sparse:", tensor.is_sparse)
print("Is Quantized:", tensor.is_quantized)
print("Is CUDA:", tensor.is_cuda)
print("Is Pinned:", tensor.is_pinned())

#Listing A-2 Preparing Token Sequences with Indexing, Slicing, and Masking
This example prepares a batch of tokenized sentences for a language model that uses padding.

In [None]:
import torch

# Real-world example: preparing a batch of tokenized sentences
# for a language model that uses padding.

# tokens.shape = (batch_size, seq_len)
# 0 is the padding token.
tokens = torch.tensor([
    [101, 2009, 2003, 1037, 2154,   0,   0,   0],  # "It is a nice [PAD] [PAD] [PAD]"
    [101, 1045, 2293, 3679, 3185, 102,   0,   0],  # "I love reading books [SEP] [PAD] [PAD]"
])
print("tokens:\n", tokens)

PAD_ID = 0

# -----------------------------------------
# 1. Indexing: pick specific samples
# -----------------------------------------
# Example: select the second sentence from the batch
second_sentence = tokens[1]
print("\nSecond sentence (indexing):\n", second_sentence)

# -----------------------------------------
# 2. Slicing: pick ranges (time steps)
# -----------------------------------------
# Example: model only uses the first 5 tokens of each sentence
first_five_tokens = tokens[:, :5]
print("\nFirst 5 tokens of each sentence (slicing):\n", first_five_tokens)

# -----------------------------------------
# 3. Masking: ignore padding tokens
# -----------------------------------------
# Create a mask where True means "real token" and False means "padding"
non_pad_mask = tokens != PAD_ID
print("\nNon-padding mask (masking):\n", non_pad_mask)

# Use the mask to get all real tokens in a flat view
real_tokens = tokens[non_pad_mask]
print("\nAll real tokens (flattened, padding removed):\n", real_tokens)

# In practice, the non_pad_mask is also used to:
# - compute loss only on real tokens
# - build attention masks for Transformer models


#Listing A-3 Combining and Splitting Features in Real Model Workflow
In many applications, a model must use both image data and additional metadata about the same sample. For example, a medical imaging model might take both a chest X-ray and patient attributes such as age and smoking status. We can concatenate the two feature sets to form a unified input. Later, if the batch is too large for GPU memory, we can split the combined tensor into micro-batches. This listing shows an example.

In [None]:
import torch

# Imagine we extracted image features using a CNN
# Shape: (batch, image_feature_dim)
image_features = torch.randn(12, 128)     # 12 samples, 128-dimensional features

# Metadata features (for example, age, weight, and risk score)
# Shape: (batch, metadata_feature_dim)
metadata = torch.randn(12, 3)             # 3 additional features per sample

# --------------------------------------------------------
# STEP 1: Concatenate features from two sources
# --------------------------------------------------------

# Combine image and metadata features along the last dimension
combined = torch.cat((image_features, metadata), dim=1)
print("Combined feature shape:", combined.shape)
# Shape: (12, 131)

# The model now sees one unified representation per patient:
# 128 image features + 3 metadata features


# --------------------------------------------------------
# STEP 2: Split into micro-batches for memory-friendly training
# --------------------------------------------------------

# Suppose our GPU can only handle 4 samples at a time
micro_batches = torch.split(combined, 4)   # splits into 3 chunks of size 4, 4, 4

print("\nNumber of micro-batches:", len(micro_batches))
for i, mb in enumerate(micro_batches):
    print(f"Micro-batch {i} shape:", mb.shape)


#Listing A-4 Matrix Operations in a Simple Forward Pass
This example reshapes a small image into a vector, then a linear layer performs a matrix multiplication to produce logits. This is exactly the pattern used throughout real models: data is reorganized, multiplied by learned weights, and passed forward repeatedly.

In [None]:
import torch
import torch.nn as nn

# A tiny example: classify a 4×4 grayscale "image"
image = torch.randn(1, 1, 4, 4)   # (batch, channels, height, width)

# Flatten layer + linear classifier
flatten = nn.Flatten()            # reshapes (1, 1, 4, 4) → (1, 16)
classifier = nn.Linear(16, 3)     # matrix multiply: (1×16) @ (16×3)

# Forward pass
x = flatten(image)                # reshape
logits = classifier(x)            # matrix multiplication
print("Logits:", logits)

#Listing A-5 Fourier Transforms and Gradient Computation in Practice
This example reflects a common pattern in real systems: specialized transforms prepare data for a model, and automatic differentiation supplies the gradients that allow that model to learn.

In [None]:
import torch
import torch.nn.functional as F

# Example 1: Frequency analysis for an audio snippet
# Simulated 1-second mono audio at 16 kHz
waveform = torch.randn(16_000)              # time-domain signal
spectrum = torch.fft.rfft(waveform)         # frequency-domain representation
print("Spectrum shape:", spectrum.shape)

# Example 2: Gradients for a tiny regression head
weights = torch.randn(10, 1, requires_grad=True)

inputs = torch.randn(4, 10)                 # 4 samples, 10 features
targets = torch.randn(4, 1)                 # regression targets

preds = inputs @ weights                    # linear model: matrix multiplication
loss = F.mse_loss(preds, targets)           # mean squared error loss
loss.backward()                             # compute gradients

print("Gradient on weights:\n", weights.grad)

#Listing A-6 Using Sparse, Quantized, and Named Tensors in a Recommendation Workflow
This listing builds a small but realistic example of a recommendation pipeline. It constructs a sparse user–item interaction matrix, retrieves the active user–item pairs, simulates dense model outputs and converts them to a quantized format suitable for deployment, and applies names to tensor dimensions for clarity. Together, these steps illustrate how advanced tensor properties support large-scale, resource-conscious systems that must remain readable and maintainable.

In [None]:
import torch

# ---------------------------------------------------------
# 1. Sparse user–item interaction matrix (user-item ratings)
# ---------------------------------------------------------
# Users: 3, Items: 5
indices = torch.tensor([[0, 1, 2],     # user indices
                        [1, 3, 4]])    # item indices
values = torch.tensor([5.0, 3.0, 4.0]) # non-zero ratings

interactions = torch.sparse_coo_tensor(indices, values, size=(3, 5))
print("Is sparse:", interactions.is_sparse)

# ---------------------------------------------------------
# 2. Identify meaningful interactions using .nonzero()
# ---------------------------------------------------------
active_positions = interactions.coalesce().indices().t()
print("Active user–item pairs:\n", active_positions)

# ---------------------------------------------------------
# 3. Simulate a dense model output and quantize it for deployment
# ---------------------------------------------------------
dense_output = torch.randn(3, 5)               # predictions or embeddings
qoutput = torch.quantize_per_tensor(dense_output, scale=0.1,
                                    zero_point=0, dtype=torch.qint8)
print("Is quantized:", qoutput.is_quantized)

# ---------------------------------------------------------
# 4. Use named tensors for dimension clarity
# ---------------------------------------------------------
named_output = dense_output.refine_names('user', 'item')
print("Named tensor dims:", named_output.names)
