In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

In [2]:
data = [[1,2],[3,4]]
x_data = torch.tensor(data)

np_array = np.array(data)
x_np = torch.from_numpy(np_array)
print(x_data)
print(x_np)

tensor([[1, 2],
        [3, 4]])
tensor([[1, 2],
        [3, 4]])


In [3]:
a = torch.tensor([[1,2,3],[4,5,6],[7,8,9]])
b = torch.tensor([[1,0,0],[0,1,0],[0,0,1]])

a + b

tensor([[ 2,  2,  3],
        [ 4,  6,  6],
        [ 7,  8, 10]])

In [4]:
a - b

tensor([[0, 2, 3],
        [4, 4, 6],
        [7, 8, 8]])

In [5]:
a * b

tensor([[1, 0, 0],
        [0, 5, 0],
        [0, 0, 9]])

In [6]:
a @ b

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [7]:
a.clamp_(2,5)
a

tensor([[2, 2, 3],
        [4, 5, 5],
        [5, 5, 5]])

In [8]:
torch.cat((a,b),1)

tensor([[2, 2, 3, 1, 0, 0],
        [4, 5, 5, 0, 1, 0],
        [5, 5, 5, 0, 0, 1]])

In [9]:
torch.cat((a,b),0)

tensor([[2, 2, 3],
        [4, 5, 5],
        [5, 5, 5],
        [1, 0, 0],
        [0, 1, 0],
        [0, 0, 1]])

In [10]:
a.reshape(1,9)

tensor([[2, 2, 3, 4, 5, 5, 5, 5, 5]])

In [11]:
a

tensor([[2, 2, 3],
        [4, 5, 5],
        [5, 5, 5]])

In [12]:
a.unsqueeze_(dim=0)

tensor([[[2, 2, 3],
         [4, 5, 5],
         [5, 5, 5]]])

In [13]:
a.shape

torch.Size([1, 3, 3])

In [14]:
a.unsqueeze_(dim=0).squeeze_().shape


torch.Size([3, 3])

In [15]:
torch.cuda.is_available()

False

In [16]:
torch.cuda.device_count()

0

In [17]:
def transfer_to_gpu(data):
    try:
        return data.to('cuda')
    except RuntimeError:
        return data

x_gpu = transfer_to_gpu(x_data)

In [28]:
x = torch.tensor([1, 2, 3])  # Shape (3,)
y = torch.tensor([4, 5])     # Shape (2,)
result = torch.einsum("i,j->ij", x, y)
result

tensor([[ 4,  5],
        [ 8, 10],
        [12, 15]])

In [22]:
x = torch.tensor([[1, 2], [3, 4]])  # Shape (2, 2)
result = torch.einsum("ij->", x)
print(result)

tensor(10)


In [24]:
x = torch.tensor([1, 2, 3])
y = torch.tensor([4, 5, 6])
result = torch.einsum("i,i->", x, y)
print(result)  # Output: tensor(32)

tensor(32)


In [29]:
A = torch.randn(3, 4, 5)  # Shape (3, 4, 5)
result = torch.einsum("ijk->kji", A)
print(result.shape)  # Output: torch.Size([5, 4, 3])

torch.Size([5, 4, 3])


In [38]:
mat1 = torch.tensor([[[1,2],[3,4]],
                    [[5,6],[7,8]]])

mat2 = torch.tensor([[[1,2],[3,4]],
                    [[5,6],[7,8]]])

out = torch.einsum('bnk,bkm->bnm', mat1, mat2)
out

tensor([[[  7,  10],
         [ 15,  22]],

        [[ 67,  78],
         [ 91, 106]]])

In [33]:
torch.rand(2,2,2)

tensor([[[0.3493, 0.6875],
         [0.3128, 0.8187]],

        [[0.0656, 0.2721],
         [0.8771, 0.3840]]])

In [51]:
dim = 0
src = torch.rand(4,4,6)
index = torch.randint(0, 4, (1, 4, 6))

out = torch.gather(src, dim, index)

print(src)
print(index)
print(out)




tensor([[[0.0732, 0.7080, 0.1491, 0.8934, 0.8501, 0.8808],
         [0.5583, 0.0190, 0.8253, 0.1859, 0.6004, 0.8518],
         [0.6320, 0.2956, 0.4388, 0.0299, 0.7550, 0.6311],
         [0.7194, 0.7896, 0.8093, 0.1154, 0.5958, 0.2232]],

        [[0.8171, 0.9269, 0.9785, 0.5975, 0.5934, 0.0187],
         [0.1932, 0.3177, 0.6441, 0.7828, 0.2892, 0.8618],
         [0.6747, 0.5028, 0.5533, 0.4182, 0.0521, 0.7730],
         [0.8694, 0.3767, 0.3975, 0.8326, 0.2360, 0.1234]],

        [[0.6229, 0.6864, 0.1960, 0.9962, 0.4411, 0.3955],
         [0.3602, 0.9446, 0.5387, 0.3753, 0.1326, 0.3168],
         [0.6213, 0.5534, 0.5535, 0.2558, 0.2011, 0.5605],
         [0.1420, 0.3451, 0.0519, 0.5927, 0.9380, 0.1966]],

        [[0.8068, 0.7358, 0.9139, 0.7906, 0.6136, 0.2223],
         [0.9072, 0.3482, 0.3605, 0.8288, 0.6576, 0.3309],
         [0.1154, 0.4608, 0.1009, 0.0053, 0.4832, 0.6288],
         [0.1196, 0.0198, 0.5463, 0.0258, 0.9369, 0.7845]]])
tensor([[[0, 0, 1, 0, 3, 3],
         [2, 2, 3, 

## Build model:

In [59]:
INPUT_SIZE = 224
C = 10

class MLP(torch.nn.Module):
    
    def __init__(self):
        super(MLP, self).__init__()
        self.flatten = torch.nn.Flatten()
        self.fc1 = torch.nn.Linear(INPUT_SIZE*INPUT_SIZE, 256)
        self.relu1 = torch.nn.ReLU()
        self.fc2 = torch.nn.Linear(256, 512)
        self.relu2 = torch.nn.ReLU()
        self.fc3 = torch.nn.Linear(512, C)
        
    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)  
        x = self.fc3(x)
        return x

In [63]:
B = 10

input = torch.randn(B, 1, INPUT_SIZE, INPUT_SIZE)
model = MLP()

output = model(input)

In [64]:
seq_model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(INPUT_SIZE*INPUT_SIZE, 256),
    torch.nn.ReLU(),
    torch.nn.Linear(256, 512),
    torch.nn.ReLU(),
    torch.nn.Linear(512, C)
)

output = seq_model(input)

In [67]:
x = torch.tensor([2.])
w = torch.tensor([1.], 
    requires_grad=True)
y = torch.tensor([3.])

y_hat = w*x
L = (y_hat - y)**2
L.backward()

print(w.grad)
print(x.grad)

tensor([-4.])
None


In [76]:
x = torch.tensor([2.])
w = torch.tensor([1.], 
    requires_grad=True)
y = torch.tensor([3.])

y_hat = w*x
L = (y_hat - y)**2
L.backward()

w.grad.zero_()

L = (x * w - y)**2
L.backward()

print(w.grad)

tensor([-4.])


In [78]:
x = torch.tensor([2.], requires_grad=True)

print(x.requires_grad)
print((x**2).requires_grad)

with torch.no_grad():
    print((x ** 2).requires_grad)

True
True
False


In [None]:
learning_rate = 0.01

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) 

optimizer.zero_grad()
optimizer.step()

In [89]:
LR = 1e-3
B_SQRT =8
B = B_SQRT**2
EPOCHS = 100
INPUT_SIZE = 28

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [84]:
import torch
from torchvision import datasets, transforms  # Correct imports for datasets and transforms

# Transform to convert images to tensors
transform = transforms.ToTensor()

# Load the training data
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=transform  # Use the transform
)

# Load the test data
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=transform  # Use the transform
)


Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 26421880/26421880 [00:00<00:00, 93458491.50it/s]


Extracting data/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29515/29515 [00:00<00:00, 204281984.42it/s]

Extracting data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz





Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4422102/4422102 [00:00<00:00, 101424166.12it/s]


Extracting data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5148/5148 [00:00<00:00, 41523609.60it/s]

Extracting data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw






In [85]:
train_dataloader = DataLoader(training_data, batch_size=B, shuffle=True)

val_dataloader = DataLoader(test_data, batch_size=B)

In [87]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(INPUT_SIZE*INPUT_SIZE, 256),
    torch.nn.ReLU(),
    torch.nn.Linear(256, 512),
    torch.nn.ReLU(),
    torch.nn.Linear(512, C)
)

model.to(device)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=256, bias=True)
  (2): ReLU()
  (3): Linear(in_features=256, out_features=512, bias=True)
  (4): ReLU()
  (5): Linear(in_features=512, out_features=10, bias=True)
)

In [90]:
criteria = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(model.parameters(), lr=LR)

In [92]:
import torch

# Define the training loop
def train_loop(dataloader, model, criteria, optimizer, device):
    model.train()  # Set the model to training mode
    total_loss = 0
    for batch, (X, y) in enumerate(dataloader):
        # Move data to the specified device
        X, y = X.to(device), y.to(device)

        # Forward pass
        pred = model(X)
        loss = criteria(pred, y)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Accumulate the loss
        total_loss += loss.item()

    avg_loss = total_loss / len(dataloader)
    print(f"Train Loss: {avg_loss:.4f}")
    return avg_loss


# Define the validation loop
@torch.no_grad()  # Disable gradient computation for validation
def val_loop(dataloader, model, criteria, device):
    model.eval()  # Set the model to evaluation mode
    total_loss = 0
    correct = 0
    total = 0

    for X, y in dataloader:
        # Move data to the specified device
        X, y = X.to(device), y.to(device)

        # Forward pass
        pred = model(X)
        loss = criteria(pred, y)
        total_loss += loss.item()

        # Calculate accuracy
        predicted_labels = pred.argmax(dim=1)
        correct += (predicted_labels == y).sum().item()
        total += y.size(0)

    avg_loss = total_loss / len(dataloader)
    accuracy = correct / total
    print(f"Validation Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}")
    return avg_loss, accuracy


# Training loop across epochs
for t in range(EPOCHS):
    print(f"Epoch {t+1}/{EPOCHS}")
    train_res = train_loop(train_dataloader, model, criteria, optimizer, device)
    val_res = val_loop(val_dataloader, model, criteria, device)


Epoch 1/100
Train Loss: 2.0981
Validation Loss: 1.9689, Accuracy: 0.4780
Epoch 2/100
Train Loss: 1.7880
Validation Loss: 1.6043, Accuracy: 0.5674
Epoch 3/100
Train Loss: 1.4417
Validation Loss: 1.3081, Accuracy: 0.6094
Epoch 4/100
Train Loss: 1.2034
Validation Loss: 1.1260, Accuracy: 0.6298
Epoch 5/100
Train Loss: 1.0561
Validation Loss: 1.0111, Accuracy: 0.6429
Epoch 6/100
Train Loss: 0.9602
Validation Loss: 0.9337, Accuracy: 0.6645
Epoch 7/100
Train Loss: 0.8927
Validation Loss: 0.8782, Accuracy: 0.6759
Epoch 8/100
Train Loss: 0.8430
Validation Loss: 0.8357, Accuracy: 0.6922
Epoch 9/100
Train Loss: 0.8045
Validation Loss: 0.8022, Accuracy: 0.7030
Epoch 10/100
Train Loss: 0.7732
Validation Loss: 0.7754, Accuracy: 0.7143
Epoch 11/100
Train Loss: 0.7473
Validation Loss: 0.7519, Accuracy: 0.7265
Epoch 12/100
Train Loss: 0.7249
Validation Loss: 0.7313, Accuracy: 0.7376
Epoch 13/100
Train Loss: 0.7050
Validation Loss: 0.7131, Accuracy: 0.7464
Epoch 14/100
Train Loss: 0.6874
Validation Loss