In [9]:
import torchvision.transforms as transforms
import torch.optim as optim
import time
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision.datasets import FashionMNIST
from tqdm.notebook import tqdm
from torch.nn import functional as F

device = 'cuda'

In [10]:
batch_size = 32

# Load the FashionMNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = FashionMNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = FashionMNIST(root='./data', train=False, transform=transform)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [15]:
class BNLeNet(nn.Module):
    def __init__(self, lr=0.1, num_classes=10):
        super().__init__()
        self.net = nn.Sequential(
            nn.LazyConv2d(6, kernel_size=5), nn.LazyBatchNorm2d(),
            nn.Sigmoid(), nn.AvgPool2d(kernel_size=2, stride=2),
            nn.LazyConv2d(16, kernel_size=5), nn.LazyBatchNorm2d(),
            nn.Sigmoid(), nn.AvgPool2d(kernel_size=2, stride=2),
            nn.Flatten(), nn.LazyLinear(120), nn.LazyBatchNorm1d(),
            nn.Sigmoid(), nn.LazyLinear(84), nn.LazyBatchNorm1d(),
            nn.Sigmoid(), nn.LazyLinear(num_classes))

    def forward(self, x): return self.net(x)

    def apply_init(self, inputs, init=None):
        self.forward(*inputs)
        if init is not None:
            self.net.apply(init)

def init_cnn(module):
    if type(module) == nn.Linear or type(module) == nn.Conv2d:
        nn.init.xavier_uniform_(module.weight)


In [16]:
model = BNLeNet().to(device)
input_data = next(iter(train_loader))[0].to(device)
model.apply_init([input_data], init_cnn)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), 0.001)

total_step = len(train_loader)
for epoch in range(10):
    epoch_loss = 0.0
    for i, (images, labels) in tqdm(enumerate(train_loader), total=total_step):
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    # Print average epoch loss
    average_loss = epoch_loss / total_step
    print(f"Epoch [{epoch+1}/10], Average Loss: {average_loss:.4f}")



  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [1/10], Average Loss: 0.5988


  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [2/10], Average Loss: 0.4009


  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [3/10], Average Loss: 0.3535


  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [4/10], Average Loss: 0.3270


  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [5/10], Average Loss: 0.3043


  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [6/10], Average Loss: 0.2922


  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [7/10], Average Loss: 0.2765


  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [8/10], Average Loss: 0.2674


  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [9/10], Average Loss: 0.2572


  0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [10/10], Average Loss: 0.2472


# here has my first impl so too many useless staff

In [None]:
import torch
from torch import nn

def init_cnn(module):  #@save
    """Initialize weights for CNNs."""
    if type(module) == nn.Linear or type(module) == nn.Conv2d:
        nn.init.xavier_uniform_(module.weight)

class LeNet(nn.Module):  #@save
    """The LeNet-5 model."""
    def __init__(self, lr=0.1, num_classes=10):
        super().__init__()
        # self.save_hyperparameters()
        self.net = nn.Sequential(
            nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.Sigmoid(),
            nn.AvgPool2d(kernel_size=2, stride=2),
            nn.Conv2d(6, 16, kernel_size=5), nn.Sigmoid(),
            nn.AvgPool2d(kernel_size=2, stride=2),
            nn.Flatten(),
            nn.Linear(16, 120), nn.Sigmoid(),
            nn.Linear(120, 84), nn.Sigmoid(),
            nn.Linear(84, num_classes))


In [None]:
# import torch
# import torchvision
# import torchvision.transforms as transforms
#
# transform = transforms.Compose(
#     [transforms.ToTensor(),
#      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
#
# batch_size = 128
#
# trainset = torchvision.datasets.FashionMNIST(root='./data', train=True,
#                                         download=True, transform=transform)
# trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
#                                           shuffle=True, num_workers=2)
#
# testset = torchvision.datasets.FashionMNIST(root='./data', train=False,
#                                        download=True, transform=transform)
# testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
#                                          shuffle=False, num_workers=2)
#
# classes = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal','Shirt', 'Sneaker', 'Bag', 'Ankle boot']

In [None]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 128

trainset = torchvision.datasets.FashionMNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.FashionMNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal','Shirt', 'Sneaker', 'Bag', 'Ankle boot']

In [None]:
print(next(iter(trainloader)))

In [None]:
imgs, labels = next(iter(trainloader))

fig=plt.figure(figsize=(20,5),facecolor='w')
for i in range(10):
    ax = plt.subplot(1,10, i+1)
    plt.imshow(imgs[i, 0, :, :], vmin=0, vmax=1.0, cmap=cm.gray)
    ax.set_title("{}".format(classes_names[labels[i]]), fontsize=15)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

plt.savefig('fashionMNIST_samples.png', bbox_inches='tight')
plt.show()

In [None]:
import matplotlib.pyplot as plt
import torchvision.transforms.functional as TF

dt  = next(iter(trainloader))
# Convert the grayscale image to RGB
rgb_image = TF.to_pil_image(dt[0].squeeze(), mode='L').convert('RGB')

# Plot the RGB image
plt.imshow(rgb_image)
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np


# functions to show an image


def imshow(img):
    img = img / 2 + 0.5  # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# get some random training images
dataiter = iter(trainloader)
images, labels = next(dataiter)
print(next(dataiter).shape)

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join(f'{classes[labels[j]]:5s}' for j in range(batch_size)))

In [None]:
model = LeNet()
# model.apply_init(next(iter(trainloader(True)))[0], init_cnn)

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [None]:
for epoch in range(10):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        X, y = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        y_pred = model(X)
        loss = criterion(y_pred, y)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:  # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import FashionMNIST



import torch.nn.functional as F

class LazyLinear(nn.Module):
    def __init__(self, out_features):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(out_features))
        self.lazy = True

    def forward(self, x):
        if self.lazy:
            self.weight.requires_grad = True
            self.lazy = False
        return x @ self.weight.t()


import torch.nn as nn

class LazyConv2d(nn.Module):
    def __init__(self, out_channels, kernel_size):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(out_channels, 1, kernel_size, kernel_size))
        self.lazy = True

    def forward(self, x):
        if self.lazy:
            self.weight.requires_grad = True
            self.lazy = False
        return F.conv2d(x, self.weight)



class LazyLeNet(nn.Module):
    def __init(self):
        super().__init__()
        self.conv1 = LazyConv2d(6, 5)
        self.conv2 = LazyConv2d(16, 5)
        self.fc1 = LazyLinear(120)
        self.fc2 = LazyLinear(84)
        self.fc3 = LazyLinear(10)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.max_pool2d(x, 2)
        x = torch.relu(self.conv2(x))
        x = torch.max_pool2d(x, 2)
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [7]:
# Define the training parameters
batch_size = 64
learning_rate = 0.001
num_epochs = 10

# Load the FashionMNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = FashionMNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = FashionMNIST(root='./data', train=False, transform=transform)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

device = torch.device("cuda")
# Initialize the LeNet model
model = LazyLeNet().to(device)


# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

import time
print(time.localtime())

# Training loop
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Print training loss for every 100 batches
        if (i + 1) % 100 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{total_step}], Loss: {loss.item():.4f}")

ValueError: optimizer got an empty parameter list

In [1]:

# Define the LeNet model
class LeNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.max_pool2d(x, 2)
        x = torch.relu(self.conv2(x))
        x = torch.max_pool2d(x, 2)
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Define the training parameters
batch_size = 64
learning_rate = 0.001
num_epochs = 10

# Load the FashionMNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = FashionMNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = FashionMNIST(root='./data', train=False, transform=transform)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

device = torch.device("cuda")
# Initialize the LeNet model
model = LeNet().to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

import time
print(time.localtime())

# Training loop
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Print training loss for every 100 batches
        if (i + 1) % 100 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{total_step}], Loss: {loss.item():.4f}")

print(time.localtime())
# Test the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f"Test Accuracy: {(100 * correct / total):.2f}%")


time.struct_time(tm_year=2023, tm_mon=7, tm_mday=18, tm_hour=5, tm_min=54, tm_sec=45, tm_wday=1, tm_yday=199, tm_isdst=1)
Epoch [1/10], Step [100/938], Loss: 0.6910
Epoch [1/10], Step [200/938], Loss: 0.8383
Epoch [1/10], Step [300/938], Loss: 0.7747
Epoch [1/10], Step [400/938], Loss: 0.4881
Epoch [1/10], Step [500/938], Loss: 0.5645
Epoch [1/10], Step [600/938], Loss: 0.6745
Epoch [1/10], Step [700/938], Loss: 0.4498
Epoch [1/10], Step [800/938], Loss: 0.5023
Epoch [1/10], Step [900/938], Loss: 0.5302
Epoch [2/10], Step [100/938], Loss: 0.6039
Epoch [2/10], Step [200/938], Loss: 0.3609
Epoch [2/10], Step [300/938], Loss: 0.4716
Epoch [2/10], Step [400/938], Loss: 0.3101
Epoch [2/10], Step [500/938], Loss: 0.4419
Epoch [2/10], Step [600/938], Loss: 0.4262
Epoch [2/10], Step [700/938], Loss: 0.4097
Epoch [2/10], Step [800/938], Loss: 0.3769
Epoch [2/10], Step [900/938], Loss: 0.4123
Epoch [3/10], Step [100/938], Loss: 0.2825
Epoch [3/10], Step [200/938], Loss: 0.5024
Epoch [3/10], Step

RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor

In [None]:
# Training loop
total_step = len(train_loader)
start_time = time.time()  # Start time
for epoch in range(num_epochs):
    epoch_loss = 0.0
    for i, (images, labels) in enumerate(train_loader):
        # Move the tensors to the configured device (GPU)
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Accumulate the epoch loss
        epoch_loss += loss.item()

    # Print average epoch loss
    average_loss = epoch_loss / total_step
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss:.4f}")

# Calculate the total training time
end_time = time.time()
training_time = end_time - start_time
print(f"Training Time: {training_time:.2f} seconds")

In [None]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LeNet().to(device)

start_time = time.time()
model.train()
for epoch in range(10):
    total_loss = 0
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        y = model(images)
        loss = criterion(y, labels)
        total_loss += loss.item()

        model.zero_grad()
        loss.backward()
        optimizer.step()
        # Print training loss for every 100 batches

    avg_loss = total_loss / total_step
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

# Calculate the total training time
end_time = time.time()
training_time = end_time - start_time
print(f"Training Time: {training_time:.2f} seconds")

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import FashionMNIST
import time

# Define the LeNet model
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()

        self.features = nn.Sequential(
            nn.Conv2d(1, 6, kernel_size=5, device='cuda'),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(6, 16, kernel_size=5, device='cuda'),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )

        self.classifier = nn.Sequential(
            nn.Linear(16 * 4 * 4, 120),
            nn.ReLU(),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, 10)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the training parameters
batch_size = 64
learning_rate = 0.001
num_epochs = 10

# Load the FashionMNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = FashionMNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = FashionMNIST(root='./data', train=False, transform=transform)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# Initialize the LeNet model and move it to the GPU
model = LeNet().to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
total_step = len(train_loader)
start_time = time.time()  # Start time
for epoch in range(num_epochs):
    epoch_loss = 0.0
    for i, (images, labels) in enumerate(train_loader):
        # Move the tensors to the configured device (GPU)
        images = images.to(device)
        labels = labels.to(device)



        model.zero_grad()
        y = model(images)
        loss = criterion(y, labels)
        total_loss += loss.item()

        loss.backward()
        optimizer.step()




        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Accumulate the epoch loss
        epoch_loss += loss.item()

    # Print average epoch loss
    average_loss = epoch_loss / total_step
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss:.4f}")

# Calculate the total training time
end_time = time.time()
training_time = end_time - start_time
print(f"Training Time: {training_time:.2f} seconds")

# Test the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        # Move the tensors to the configured device (GPU)
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f"Test Accuracy: {(100 * correct / total):.2f}%")


In [None]:
X = torch.tensor([[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]],
                  [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]])
K = torch.tensor([[[0.0, 1.0], [2.0, 3.0]], [[1.0, 2.0], [3.0, 4.0]]])

[(x, k) for x, k in zip(X,K)]


In [None]:
K = torch.stack((K, K + 1, K + 2), 0)
K

In [4]:
import time
from tqdm import tqdm
for i in range(10):
    time.sleep(0.3)
    for j in tqdm(range(20), desc='2', leave=True):
        time.sleep(0.1)
        pass

2: 100%|██████████| 20/20 [00:02<00:00,  9.03it/s]
2: 100%|██████████| 20/20 [00:02<00:00,  9.02it/s]
2: 100%|██████████| 20/20 [00:02<00:00,  9.07it/s]
2:   0%|          | 0/20 [00:00<?, ?it/s]


KeyboardInterrupt: 