In [None]:
import os 
import sys

import matplotlib
import matplotlib.pyplot as plt 
import pandas as pd 
import PIL 
import torch 
import torchvision
from  PIL import Image
from torchvision import transformers 




In [None]:
data_dir = os.path.join("data_p1", "data_multiclass")
train_dir = os.path.join(data_dir, "train")


class_directories = os.listdir(train_dir)
print("class_directories type:", type(class_directories))
print("class_directories length:", len(class_directories))
print(class_directories)

In [None]:
class_distribution_list = {}

for subdirectory in class_directories:
    dir = os.path.join(train_dir, subdirectory)
    files = os.listdir(dir)
    num_files = len(files)
    class_distribution_list[subdirectory]  = num_files

class_distribution_list = pd.Series(class_distribution_list)


<b>PRACTICE RECAP NEURAL NETWORK BINARY CLASSIFICATION</b>

In [None]:
import os
import sys
from collections import Counter

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import PIL
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
from torch.utils.data import DataLoader, random_split
from torchinfo import summary
from torchvision import datasets, transforms
from tqdm.notebook import tqdm

torch.backends.cudnn.deterministic = True

In [None]:
print("Platform:", sys.platform)
print("Python version:", sys.version)
print("---")
print("matplotlib version:", matplotlib.__version__)
print("pandas version:", pd.__version__)
print("PIL version:", PIL.__version__)
print("torch version:", torch.__version__)
print("torchvision version:", torchvision.__version__)

In [None]:
if torch.cuda.is_available():
    device = "cuda"
elif torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cpu"

print(f"Using {device} device.")

In [None]:
class ConvertToRGB:
    def __call__(self, img):
        if img.mode != "RGB":
            img = img.convert("RGB")
        return img


In [None]:
transform = transforms.Compose(
    [
        ConvertToRGB(),
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ]
)

In [None]:
data_dir = "data_p1/data_multiclass"
train_dir = os.path.join(data_dir, "train")

print("Will read data from:", train_dir)

In [None]:
dataset = datasets.ImageFolder(root=train_dir, transform=transform)

In [None]:
print("Classes:")
print(dataset.classes)
print(f"That's{len(dataset.classes)} classes.")

print()
print("Tensor shape for one image:")
print(dataset[0][0].shape)

In [None]:
batch_size = 32 

dataset_loader = DataLoader(dataset, batch_size=batch_size)
first_batch = next(iter(dataset_loader))


print(f"Shape of one batch: {first_batch[0].shape}")+
print(f"Shape of labels: {first_batch[1].shape}")


In [None]:
def get_mean_std_loader(loader):

    channel_sum, channels_squared_sum, num_batches = 0,0,0
    for data, _ in tqdm(loader, desc="Calculating mean and std", leave=False):
        channel_sum += torch.mean(data, dim=[0, 2, 3])
        channels_squared_sum += torch.mean(data ** 2, dim=[0, 2, 3])

        num_batches += 1
    mean = channel_sum / num_batches
    std = (channels_squared_sum / num_batches - mean ** 2) ** 0.5
    return mean, std   


In [None]:
mean, std = get_mean_std_loader(dataset_loader)

print("Mean:", mean)
print("Std:", std)

In [None]:
transform_norm = transforms.Compose(
    ConvertToRGB(),
    transforms.Resize((224,224))
    transforms.Normalize(mean=mean, std=std)
)

In [None]:
norm_dataset=datasets.ImageFolder(root=train_dir, transform=transform_norm)

norm_loader= DataLoader(norm_dataset, batch_size=32)

In [None]:
norm_mean, norm_std = get_mean_std_loader(norm_loader)

print(f"Norm Mean: {norm_mean}")
print(f"Norm Std: {norm_std}")

Data Splitting 

In [None]:
g = torch.generator()
g.manual_seed(42)

train_dataset, val_dataset = random_split(norm_dataset, [0.8, 0.2], generator=g)

length_train = len(train_dataset)
length_val = len(val_dataset)
length_dataset = len(norm_dataset)
percent_train = np.round(length_train / length_dataset * 100, 2)
percent_val = np.round(length_val / length_dataset * 100, 2)

print(f"Train data is {percent_train}% of the full data")
print(f"Validation data is {percent_val}% of the full data")

Visualization of each category in the data splitting 


In [None]:
def class_counts(dataset):
    c= Counter(x[1] for x in tqdm(dataset))
    class_to_index = dataset.dataset.class_to_idx
    return pd.Series({cat: c[idx] for cat, idx in class_to_index.items()})

In [None]:
train_class_distributions = class_counts(train_dataset)

train_class_distributions


##Using barplot  for training class distribution visualization
train_class_distributions.sort_values().plot(kind='bar')

plt.xlabel("Class Label")
plt.ylabel("Frequency[count]")
plt.title("Class Distribution in Training Set")
plt.show()


##Using barplot  for validation class distribution visualization
val_class_distributions = class_counts(val_dataset)

val_class_distributions

val_class_distributions.sort_values().plot(kind='bar')

plt.xlabel("Class Label")
plt.ylabel("Frequency[count]")      
plt.title("Class Distribution in Validation Set")
plt.show()


In [None]:
g = torch.Generator()
g.manual_seed(42)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

single_batch = next(iter(train_loader))[0]
print(f"Shape of one batch: {single_batch.shape}")


Multiclass Predictions!

In [None]:
sample_confidence = torch.tensor([0.13, 0.01, 0.02, 0.12, 0.10, 0.34, 0.16, 0.12])

In [None]:
classes = norm_dataset.classes

class_number = torch.argmax(sample_confidence)
prediction = classes[class_number]

print(f "This image is a {prediction}")

Network Architecture

In [None]:
model_seq = nn.Sequential()

conv1= torch.nn.Conv2d(in_channels=3, out_channels=16, kernel_Size=(3,3), padding=1)
model_seq.append(conv1)

Let's also get a single batch of 32 images, so we can see what the model is doing.

In [None]:
test_batch = next(iter(train_loader))[0]

Let's make sure the batch is the shape we expect. It should be 32 images, with 3 color channels, of size 224 x 224.

In [None]:
batch_shape = test_batch.shape

print(f"Batch shape: {batch_shape}")

Get the shape of the output and store it to first_step_shape.

In [None]:
first_step_out = model_seq(test_batch)

print(f"Shape after first conv layer: {first_step_out.shape}")

Shape after first convolution layer: torch.Size([32, 16, 224, 224])

In [None]:
## We need an activation function after the convolutional layer to introduce non-linearity.
model_seq.append(nn.ReLU())

Our convolution layers transform images into more images. Ultimately, we're going to need to get down to just our 8 output classes. But our convolution has increased the number of pixels! Max pooling will let us shrink our image.

In PyTorch, this is a MaxPool2D layer. The 2D is because we're leaving the channels alone, so it'll max pool on each of our 
16 channels separately. We'll need to say how big of a patch to reduce, called the kernel again. We'll set it to 2x2, a standard choice. We'll set our stride to 2 as well

In [None]:
max_pool1 = torch.nn.MaxPool2d(kernel_size=(2,2),stride=2)

model_seq.append(max_pool1)

In [None]:
#Run again on a test batch

max_pool_out = model_seq(test_batch)
max_pool_shape =max_pool_out.shape
print(f"Shape after max pooling: {max_pool_shape}")


: 

In [None]:
conv2 = torch.nn.Conv2D(in_channels=16, out_channels=32, kernel_size=(3,3), padding=1)
max_pool2 = torch.nn.MaxPool2D(kernel_size=(2,2), stride=2)

model_seq.append(conv2)
model_seq.append(torch.nn.ReLU())
model_seq.append(max_pool2)

Run the current model on the test_batch, and save the output's shape to second_set_shape.

In [None]:
second_set_out = model_seq(test_batch)
second_set_shape = second_set_out.shape

print(f"Shape after second conv and max pooling: {second_set_shape}")

Shape after second max pool: torch.Size([32, 32, 111, 111])

You can actually simplify these descriptions. First, you don't have to provide the argument names for most things, IF you provide them in the right order (padding is an exception). Second, for the kernels, if you say 2 it knows you mean 2 x 2 (same for 3, etc). Finally, for the max pool you can leave off the stride, it defaults to the size of the kernel. We can use this to make the description of our third layer set more compact. We'll use 64 kernels this time.

In [None]:
conv3 = torch.nn.Conv2d(32, 64, 3, padding=1)
max_pool3 = torch.nn.MaxPool2d(2)

model_seq.append(conv3)
model_seq.append(torch.nn.ReLU())
model_seq.append(max_pool3) 

Sequential(<br>
  (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) <br>
  (1): ReLU()<br>
  (2): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)<br>
  (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))<br>
  (4): ReLU()<br>
  (5): MaxPool2d(kernel_size=(2, 2), stride=1, padding=0, dilation=1, ceil_mode=False)<br>
  (6): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))<br>
  (7): ReLU()<br>
  (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)<br>
)<br>

In [None]:
third_set_out = model_seq(test_batch)
third_set_shape = third_set_out.shape

print(f"Shape after third conv and max pooling: {third_set_shape}")

Shape after third max pool: torch.Size([32, 64, 55, 55])

<b>Finishing the model</b>
We're going to need an output layer with just 8 neurons. That's a flat output, without the 3D structure of our images. Conveniently, PyTorch provides a Flatten layer for flattening. Let's add that to our model.

In [None]:
model_seq.append(torch.nn.Flatten())

Sequential( <br>
  (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) <br>
  (1): ReLU() <br>
  (2): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False) <br>
  (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) <br>
  (4): ReLU() <br>
  (5): MaxPool2d(kernel_size=(2, 2), stride=1, padding=0, dilation=1, ceil_mode=False) <br>
  (6): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) <br>
  (7): ReLU() <br>
  (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) <br>
  (9): Flatten(start_dim=1, end_dim=-1) <br>
) <br>

flat_out = model_seq(test_batch)
flat_shape = flat_out.shape

print(f"Shape after flattening: {flat_shape}")

Shape after flattening: torch.Size([32, 50176])

At this point we have a flat input, and can build a normal set of dense layers. You can think of the convolution/max pool layers as having done the image processing. Now we need to do the actual classification. It turns out that dense layers are good at that task.

We could add a single layer and just go straight to our output 8 classes. But we'll get better performance by adding a few dense layers, Linear in PyTorch's terminology, first. For these layers, we need to tell it the size of the input, and how many neurons we want in the layer. Since the input is our previous layer, we tell it that size. We'll add a layer of 500 neurons.

In [None]:
linear1 = torch.nn.Linear(in_features=50176, out_features=500)

model_seq.append(linear1)

Sequential(<br>
  (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))<br>
  (1): ReLU()<br>
  (2): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)<br>
  (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))<br>
  (4): ReLU()<br>
  (5): MaxPool2d(kernel_size=(2, 2), stride=1, padding=0, dilation=1, ceil_mode=False)<br>
  (6): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))<br>
  (7): ReLU()<br>
  (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)<br>
  (9): Flatten(start_dim=1, end_dim=-1)<br>
  (10): Linear(in_features=50176, out_features=500, bias=True)<br>
)

model_seq.append(torch.nn.ReLU())

Sequential(<br>
  (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))<br>
  (1): ReLU()<br>
  (2): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)<br>
  (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))<br>
  (4): ReLU()<br>
  (5): MaxPool2d(kernel_size=(2, 2), stride=1, padding=0, dilation=1, ceil_mode=False)<br>
  (6): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))<br>
  (7): ReLU()<br>
  (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)<br>
  (9): Flatten(start_dim=1, end_dim=-1)<br>
  (10): Linear(in_features=50176, out_features=500, bias=True)<br>
  (11): ReLU()<br>
)<br>

In [None]:
linear_out = model_seq(test_batch)
linear_shape = linear_out.shape

print(f"Shape after linear layer: {linear_shape}")

Shape after linear layer: torch.Size([32, 500])

 Add the output dense layer.

In [None]:
output_layer = torch.nn.Linear(in_features=500, out_features=8, bias=True)

model_seq.append(output_layer)

Sequential( <br>
  (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))<br>
  (1): ReLU()<br>
  (2): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)<br>
  (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))<br>
  (4): ReLU()<br>
  (5): MaxPool2d(kernel_size=(2, 2), stride=1, padding=0, dilation=1, ceil_mode=False)<br>
  (6): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))<br>
  (7): ReLU()<br>
  (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)<br>
  (9): Flatten(start_dim=1, end_dim=-1)<br>
  (10): Linear(in_features=193600, out_features=500, bias=True)<br>
  (11): ReLU()<br>
  (12): Linear(in_features=500, out_features=8, bias=True)<br>
)

In [None]:
model_seq(test_batch).shape

In [None]:
torch.Size([32, 8])

<b>Training the Model</b>
Before we start training, let's put all the model code in one place. This is how you'd do it in practice, to prevent errors.

We have also added Dropout layers after the flattened and linear layers. This helps to prevent overfitting.

In [None]:
# Important! Don't change this
torch.manual_seed(42)
torch.cuda.manual_seed(42)

model = torch.nn.Sequential()

conv1 = torch.nn.Conv2d(in_channels=3, out_channels=16, kernel_size=(3, 3), padding=1)
max_pool1 = torch.nn.MaxPool2d(kernel_size=(2, 2), stride=2)
model.append(conv1)
model.append(torch.nn.ReLU())
model.append(max_pool1)

conv2 = torch.nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), padding=1)
max_pool2 = torch.nn.MaxPool2d(kernel_size=(2, 2), stride=2)
model.append(conv2)
model.append(torch.nn.ReLU())
model.append(max_pool2)

conv3 = torch.nn.Conv2d(32, 64, 3, padding=1)
max_pool3 = torch.nn.MaxPool2d(2)
model.append(conv3)
model.append(torch.nn.ReLU())
model.append(max_pool3)

model.append(torch.nn.Flatten())
model.append(torch.nn.Dropout())

linear1 = torch.nn.Linear(in_features=50176, out_features=500)
model.append(linear1)
model.append(torch.nn.ReLU())
model.append(torch.nn.Dropout())

output_layer = torch.nn.Linear(500, 8)
model.append(output_layer)

Sequential( <Br>
  (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))<Br>
  (1): ReLU()<Br>
  (2): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)<Br>
  (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))<Br>
  (4): ReLU()<Br>
  (5): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)<Br>
  (6): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))<Br>
  (7): ReLU()<Br>
  (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)<Br>
  (9): Flatten(start_dim=1, end_dim=-1)<Br>
  (10): Dropout(p=0.5, inplace=False)<Br>
  (11): Linear(in_features=50176, out_features=500, bias=True)<Br>
  (12): ReLU()<Br>
  (13): Dropout(p=0.5, inplace=False)<Br>
  (14): Linear(in_features=500, out_features=8, bias=True)<Br>
)<Br>

And to make sure PyTorch has the model correct, let's look at the summary.

In [None]:
height, width = 224, 224
summary(model, input_size=(batch_size, 3, height, width))

This model has over 25 million parameters. That's fewer than our previous model, but because of the more complicated architecture it'll take more time and resources to train.

We can use the same training code we used last time. It can handle binary or multiclass classification. We made a separate file with this code in the previous notebook. Now we can reuse that code by importing it.

from training import predict, train

As in the previous notebook, we'll use the cross entropy as our loss function. This will take into account how confident the model is in its answer, as well as whether it was right or wrong. We will also print the accuracy as a human-readable measure.

We'll need to set up our Cross Entropy loss, and an optimizer. We'll also make sure our model is on the GPU.

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
model.to(device)

Sequential( <Br>
  (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))<Br>
  (1): ReLU()<Br>
  (2): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)<Br>
  (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))<Br>
  (4): ReLU()<Br>
  (5): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)<Br>
  (6): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))<Br>
  (7): ReLU()<Br>
  (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)<Br>
  (9): Flatten(start_dim=1, end_dim=-1)<Br>
  (10): Dropout(p=0.5, inplace=False)<Br>
  (11): Linear(in_features=50176, out_features=500, bias=True)<Br>
  (12): ReLU()<Br>
  (13): Dropout(p=0.5, inplace=False)<Br>
  (14): Linear(in_features=500, out_features=8, bias=True)<Br>
)<Br>

In [None]:
train(model, optimizer, loss_fn, train_loader, val_loader, epochs=8, device=device)

Pretrained Model: Load the pre-trained model with the following line:

In [None]:
model = torch.load("model/trained_model.pth", weights_only=False)

In [None]:
probabilities = predict(model, val_loader, device)
predictions = torch.argmax(probabilities, dim=1)

With those and the correct answers, we can generate the confusion matrix. Let's pull the targets into a list.

In [None]:
targets = []

for _, labels in tqdm(val_loader):
    targets.extend(labels.tolist())

 Make the same confusion matrix we made last time. You'll need to either move the predictions to cpu or convert them to a list. The labels will be our classes.

In [None]:
cm = confusion_matrix(targets, predictions.cpu())

disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=classes)

disp.plot(cmap=plt.cm.Blues, xticks_rotation="vertical")
plt.show();

For testing

test_dir = os.path.join(data_dir, "test")

# Read the CSV file
id_file_location = os.path.join(test_dir, "test_features.csv")
df_ids = pd.read_csv(id_file_location)

df_ids.head()

In [None]:
test_image_location = df_ids.iloc[0, 1]
test_image_location

In [None]:
test_image_path = os.path.join(test_dir, test_image_location)
test_image = PIL.Image.open(test_image_path)
test_image

In [None]:
test_image_trans = transform_norm(test_image)
test_image_trans.shape

torch.Size([3, 224, 224])



Almost there. Our model is actually expecting a batch of these. Since we only have one, we'll need to change the tensor to a 1 x 3 x 224 x 224
. We could do this with reshape, but it's easier to use unsqueeze. unsqueeze is meant for this exact problem. It adds an extra dimension with one element. We just specify which extra dimension we want. In our case, we want the first dimension (i.e. 0) to be the extra one.

In [None]:
test_unsqueeze = test_image_trans.unsqueeze(0)
test_unsqueeze.shape

torch.Size([1, 3, 224, 224])

In [None]:
test_image_cuda = test_unsqueeze.to(device)
test_out = model(test_image_cuda)
test_out

tensor([[ 2.7270, -6.9468,  2.9463, -0.1695, -7.5702, -3.2693, -2.5679,  1.5404]],
       device='cuda:0', grad_fn=<AddmmBackward0>)

That's the model's predictions, but it's not in the form we expected. We want a set of numbers between 0 and 1, that sum to 1. What we're seeing is the raw output of the last layer. To convert this to the confidences, we need to run it through a SoftMax. This is very much like the logistic or sigmoid you've seen before, except that it works with many inputs. The dim=1 tells it each row is one prediction.

In [None]:
test_softmax = torch.nn.functional.softmax(test_out, dim=1)
test_softmax

Now that we have our numbers, we need to convert to a DataFrame. The predictions are made in the same order as the classes in our dataset. We can't convert a tensor directly to a DataFrame, so we'll convert it to a list first. Then we can set the columns to our classes.

In [None]:
test_df = pd.DataFrame(test_softmax.tolist())
test_df.columns = dataset.classes

test_df

In [None]:
image_id = df_ids.iloc[0, 0]
test_df.index = [image_id]

test_df

Let's put this into a function to make it easier. We'll also add a few things to make PyTorch run faster.

In [None]:
model.to(device)


def file_to_confidence(file_path, image_id):
    image = PIL.Image.open(file_path)
    transformed = transform_norm(image)
    unsqueezed = transformed.unsqueeze(0)
    image_cuda = unsqueezed.to(device)

    model.eval()
    with torch.no_grad():
        model_raw = model(image_cuda)
        confidence = torch.nn.functional.softmax(model_raw, dim=1)

    conf_df = pd.DataFrame(confidence.tolist())
    conf_df.columns = dataset.classes
    conf_df.index = [image_id]

    return conf_df

In [None]:
file_to_confidence(test_image_path, image_id)

 Do it! You can use df_ids.itertuples() to get one row at a time, and pd.concat to assemble many DataFrames into one big one.

In [None]:
small_dfs = []

for row in df_ids.itertuples():
    image_id = row.id
    file_loc = row.filepath
    filepath = os.path.join(test_dir, file_loc)
    small_dfs.append(file_to_confidence(filepath, image_id))

confidence_df = pd.concat(small_dfs)

In [None]:
confidence_df.to_csv("submission.csv")

<b>Saving the Model</b>
We've put a lot of effort into this model, and it took a while to train. The training has determined what the best parameters (also called weights) for our network. The only information we need to reproduce it's the network architecture, and the values of those parameters. PyTorch lets us save all of this. Then we can just load the model in the future instead of having to retrain it.


In [None]:
torch.save(model, "model/deepnet")