In [2]:
import torch
import torchvision
import os
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torch.optim as optim

### Two kind of graph computation
Dynamic graph computation and static graph computation are two ways to represent and execute computation in deep learning frameworks.

Dynamic Graph Computation (also called "eager execution" or "define-by-run"):
- In dynamic graph computation, the computation graph is built and executed on-the-fly as you write the code. It is constructed and executed step-by-step for each operation.
- This approach is more intuitive and easier to debug because the code execution closely follows the written code. You can use standard Python debugging tools, and the code behaves like a typical Python script.
- Dynamic graph computation is used in frameworks like PyTorch, TensorFlow 2.x (by default), and PaddlePaddle's dynamic mode.

Static Graph Computation (also called "define-and-run"):
- In static graph computation, the computation graph is defined and optimized before execution. The graph is constructed during the "define" phase, and the actual computation happens later during the "run" phase.
- This approach allows the framework to perform optimizations, like memory and computation reuse, which can lead to better performance. However, it can be more challenging to debug and less intuitive.
- Static graph computation is used in frameworks like TensorFlow 1.x and PaddlePaddle's static mode.

When to use each approach:
- Use dynamic graph computation when you need an easy-to-debug and more intuitive code. This approach is beneficial during research and development when you need to iterate and change the model structure frequently.
- Use static graph computation when performance is a priority and you have a stable model architecture. This approach is more suitable for production environments where the model structure does not change often.

The main difference between these two approaches is how the computation graph is built and executed. Dynamic graph computation builds and executes the graph as you write the code, which makes it more intuitive and easier to debug. In contrast, static graph computation requires the graph to be defined first and optimized before execution, which can result in better performance but is less intuitive and more challenging to debug.

In PyTorch, dynamic graph computation (also called eager execution) is the default mode. You don't need to do anything special to use it. When you write PyTorch code and define your model, you're automatically using dynamic graph computation.

Here's an example of defining and training a simple model using dynamic graph computation in PyTorch:

```python
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms

# Define a simple model
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.fc = nn.Linear(784, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = self.fc(x)
        return x

# Create the model, optimizer, and loss function
model = SimpleModel()
optimizer = optim.SGD(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

# Load the MNIST dataset
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('./data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=64, shuffle=True)

# Train the model using dynamic graph computation
for epoch in range(10):
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        if batch_idx % 100 == 0:
            print("Epoch: {} Batch: {} Loss: {:.4f}".format(epoch, batch_idx, loss.item()))
```

In this example, the computation graph is built and executed on-the-fly during training, as you define the forward pass, loss calculation, and backpropagation.

PyTorch does not have a built-in static graph computation mode like TensorFlow 1.x or PaddlePaddle's static mode. However, PyTorch provides the TorchScript module, which allows you to compile your PyTorch models into an intermediate representation suitable for deployment and optimization.

To use TorchScript, you need to convert your model using `torch.jit.script` or `torch.jit.trace`. Here's an example of how to trace a model using TorchScript:

```python
# Convert the model to TorchScript
traced_model = torch.jit.trace(model, torch.randn(1, 1, 28, 28))

# Save the traced model to a file
torch.jit.save(traced_model, 'traced_model.pt')

# Load the traced model from the file
loaded_traced_model = torch.jit.load('traced_model.pt')

# Use the traced model for inference
output = loaded_traced_model(torch.randn(1, 1, 28, 28))
```

Keep in mind that TorchScript is not the same as a static graph computation mode, but it allows you to optimize your model for deployment and can provide some performance improvements.

In [3]:
transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])
mnist_train=torchvision.datasets.MNIST(root='/Users/isabelleliu/Desktop/code practice',train=True, download=False, transform=transform)
mnist_test=torchvision.datasets.MNIST(root='/Users/isabelleliu/Desktop/code practice',train=False, download=False, transform=transform)

#split train into train and validation
train_set, val_set=random_split(mnist_train,[len(mnist_train)-10000,10000])
batch_size=64

#create dataloader use default function
train_loader=DataLoader(train_set,batch_size,shuffle=True)
val_loader=DataLoader(val_set,batch_size,shuffle=True)
test_loader=DataLoader(mnist_test,batch_size,shuffle=True)

In [4]:
# Define the MNIST model
class MNIST(nn.Module):
    def __init__(self):
        super(MNIST, self).__init__()
        self.fc = nn.Linear(in_features=784, out_features=10)

    def forward(self, inputs):
        outputs = self.fc(inputs)
        return outputs

# Image normalization function
def norm_img(img):
    batch_size = img.shape[0]
    img = img / 127.5 - 1
    img = torch.reshape(img, (batch_size, 784))
    return img

def train(model):
    model.train()
    opt = optim.SGD(model.parameters(), lr=0.001)
    EPOCH_NUM = 10
    for epoch in range(EPOCH_NUM):
        for batch_id, (data, target) in enumerate(train_loader):
            images = norm_img(data).type(torch.float32)
            labels = target.type(torch.int64)

            # Forward pass
            predicts = model(images)

            # Calculate loss
            loss = F.cross_entropy(predicts, labels)
            avg_loss = torch.mean(loss)

            # Print loss every 1000 batches
            if batch_id % 1000 == 0:
                print("epoch_id: {}, batch_id: {}, loss is: {}".format(epoch, batch_id, avg_loss.item()))

            # Backward pass and optimization
            avg_loss.backward()
            opt.step()
            opt.zero_grad()

model = MNIST()

train(model)

torch.save(model.state_dict(), 'temp.pth')
print("==> Trained model saved in temp.pth")

epoch_id: 0, batch_id: 0, loss is: 2.3578267097473145
epoch_id: 1, batch_id: 0, loss is: 2.2945075035095215
epoch_id: 2, batch_id: 0, loss is: 2.3089680671691895
epoch_id: 3, batch_id: 0, loss is: 2.298851251602173
epoch_id: 4, batch_id: 0, loss is: 2.304023504257202
epoch_id: 5, batch_id: 0, loss is: 2.320495367050171
epoch_id: 6, batch_id: 0, loss is: 2.308727502822876
epoch_id: 7, batch_id: 0, loss is: 2.296772003173828
epoch_id: 8, batch_id: 0, loss is: 2.3020801544189453
epoch_id: 9, batch_id: 0, loss is: 2.310842514038086
==> Trained model saved in temp.pth


In [5]:
test_image, label = mnist_test[0]
print("The label of readed image is: ", label)

# Convert the test image to a tensor and reshape it to [1, 784]
test_image = test_image.view(1, -1)

# Load the saved model
loaded_model = MNIST()
loaded_model.load_state_dict(torch.load("temp.pth"))
loaded_model.eval()

# Use the loaded model for prediction
preds = loaded_model(test_image)
pred_label = torch.argmax(preds)

# Print the prediction result
print("The predicted label is: ", pred_label.item())

The label of readed image is:  7
The predicted label is:  0
