# 第6課 PyTorch與TensorFlow/Keras深度學習框架全方位比較
近年來，深度學習領域迅速發展，PyTorch與TensorFlow/Keras作為兩大主流框架，各自佔據了重要的市場份額。本章將全面比較這兩個框架在歷史發展、設計理念、應用場景以及演算法實現等多個層面的差異，並提供可在Google Colab上運行的比較程式碼。

# 歷史與發展歷程
**PyTorch的誕生與演進**  
PyTorch是由Facebook（現Meta）的AI研究團隊於2016年9月發布的開源深度學習框架，基於Torch庫。PyTorch的創建理念很鮮明——它認為當時的兩個主要對手分別是「把簡單的事情複雜化的TensorFlow 1.x」和「把複雜的事情太過簡化的Keras」。這一理念促使PyTorch尋求一個平衡點，提供既靈活又易用的解決方案。

隨著時間推移，PyTorch逐漸在學術界和研究社區中獲得廣泛使用。2022年9月，Meta宣布PyTorch將由獨立的PyTorch Foundation管理，該基金會是Linux Foundation的新成立子公司。2023年3月，PyTorch 2.0版本發布，引入了TorchDynamo（一個Python級別的編譯器），進一步增強了框架的性能。

TensorFlow與Keras的發展路徑
TensorFlow由Google於2015年推出，最初作為一個以靜態計算圖著稱的開源框架。與此同時，Keras由François Chollet於2015年開發，最初是ONEIROS項目的一部分。Keras設計為一個高階API，可以在TensorFlow、Theano和CNTK等多個後端運行。

隨著用戶對更簡便開發體驗的需求增加，TensorFlow 2.0版本明顯轉向了更友好的開發體驗，並將Keras作為其主要的高階API集成進來。如今，Keras已成為TensorFlow生態系統中不可分割的一部分，成為初學者入門和快速原型設計的首選工具。

In [None]:
# colab 已有安裝好的 torch 與 tensorflow
# 如果執行以下的安裝，會導致版本衝突，請不要執行

# 安裝必要的套件
#!pip install torch torchvision
#重新安裝tensorflow, restart session after this
#!pip install tensorflow==2.12.1


In [None]:

#檢查cuda版本 (Windows也適用這兩個指令)
!nvcc --version
!nvidia-smi

# 環境設置
import time
import numpy as np
import matplotlib.pyplot as plt

# ==== PyTorch 實現 ====
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

print("PyTorch 版本:", torch.__version__)

# 設定 PyTorch 設備
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("PyTorch 使用設備:", device)


In [None]:

# 加載 MNIST 數據集
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

trainset_torch = torchvision.datasets.MNIST(root='./data', train=True,
                                            download=True, transform=transform)
trainloader_torch = torch.utils.data.DataLoader(trainset_torch, batch_size=64,
                                               shuffle=True, num_workers=2)

testset_torch = torchvision.datasets.MNIST(root='./data', train=False,
                                           download=True, transform=transform)
testloader_torch = torch.utils.data.DataLoader(testset_torch, batch_size=64,
                                              shuffle=False, num_workers=2)

# 定義 PyTorch 模型
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28 * 28, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        x = self.flatten(x)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# 創建模型實例
net_torch = Net().to(device)

# 定義損失函數和優化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net_torch.parameters(), lr=0.001)

# 訓練 PyTorch 模型
def train_pytorch(epochs=5):
    start_time = time.time()
    losses = []
    accuracies = []

    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader_torch, 0):
            #training for smaller batches of data
            inputs, labels = data[0].to(device), data[1].to(device)

            optimizer.zero_grad()

            outputs = net_torch(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            if i % 100 == 99:
                print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 100:.3f}')
                losses.append(running_loss / 100)
                running_loss = 0.0

        # 計算每個 epoch 的準確率
        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader_torch:
                images, labels = data[0].to(device), data[1].to(device)
                outputs = net_torch(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        accuracy = 100 * correct / total
        accuracies.append(accuracy)
        print(f'Epoch {epoch + 1}, Accuracy: {accuracy:.2f}%')

    end_time = time.time()
    training_time = end_time - start_time
    print(f'PyTorch 訓練完成，耗時: {training_time:.2f} 秒')

    return losses, accuracies, training_time


# ==== TensorFlow/Keras 實現 ====
import tensorflow as tf
from tensorflow.keras import layers, models

print("TensorFlow 版本:", tf.__version__)

# 加載 MNIST 數據集
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()

# 數據預處理
train_images = train_images.reshape((60000, 28, 28, 1)).astype('float32') / 255
test_images = test_images.reshape((10000, 28, 28, 1)).astype('float32') / 255

train_images = (train_images - 0.5) / 0.5
test_images = (test_images - 0.5) / 0.5

train_labels = tf.keras.utils.to_categorical(train_labels)
test_labels = tf.keras.utils.to_categorical(test_labels)

# 定義 Keras 模型
model_keras = models.Sequential([
    layers.Flatten(input_shape=(28, 28, 1)),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])

# 編譯模型
model_keras.compile(optimizer='adam',
                    loss='categorical_crossentropy',
                    metrics=['accuracy'])

# 顯示模型摘要
model_keras.summary()

# 訓練 Keras 模型
def train_keras(epochs=5):
    start_time = time.time()

    history = model_keras.fit(train_images, train_labels, epochs=epochs,
                              batch_size=64, validation_data=(test_images, test_labels))

    end_time = time.time()
    training_time = end_time - start_time
    print(f'Keras 訓練完成，耗時: {training_time:.2f} 秒')

    return history, training_time

# 比較兩個框架
def compare_frameworks():
    print("開始 PyTorch 訓練...")
    losses_torch, accuracies_torch, time_torch = train_pytorch(epochs=5)

    print("\n開始 Keras 訓練...")
    history_keras, time_keras = train_keras(epochs=5)

    # 比較訓練時間
    print("\n訓練時間比較:")
    print(f"PyTorch: {time_torch:.2f} 秒")
    print(f"Keras: {time_keras:.2f} 秒")

    # 繪製準確率對比圖
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.plot(range(1, 6), accuracies_torch, 'bo-', label='PyTorch')
    plt.plot(range(1, 6), [history_keras.history['val_accuracy'][i] * 100 for i in range(5)], 'ro-', label='Keras')
    plt.title('Accuracy Comparison') #準確率對比
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.legend()

    # 繪製損失函數對比圖
    plt.subplot(1, 2, 2)
    plt.plot(losses_torch, 'b-', label='PyTorch')
    plt.plot(history_keras.history['loss'], 'r-', label='Keras')
    plt.title('Loss Comparison') #損失函數對比
    plt.xlabel('Iteration')  #迭代次數
    plt.ylabel('Loss')
    plt.legend()

    plt.tight_layout()
    plt.show()

# 執行比較
compare_frameworks()


**進階定制化比較：自定義層和訓練循環**

In [None]:
# ===== PyTorch 自定義層 =====
import torch
import torch.nn as nn

class CustomLayer(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(CustomLayer, self).__init__()
        self.weight = nn.Parameter(torch.randn(input_dim, output_dim))
        self.bias = nn.Parameter(torch.randn(output_dim))

    def forward(self, x):
        # 自定義前向傳播邏輯
        return torch.matmul(x, self.weight) + self.bias

# 使用自定義層創建模型
class CustomModel(nn.Module):
    def __init__(self):
        super(CustomModel, self).__init__()
        self.flatten = nn.Flatten()
        self.custom = CustomLayer(28 * 28, 128)
        self.relu = nn.ReLU()
        self.fc = nn.Linear(128, 10)

    def forward(self, x):
        x = self.flatten(x)
        x = self.relu(self.custom(x))
        x = self.fc(x)
        return x

# ===== TensorFlow/Keras 自定義層 =====
import tensorflow as tf
from tensorflow.keras import layers

class CustomLayerKeras(layers.Layer):
    def __init__(self, output_dim):
        super(CustomLayerKeras, self).__init__()
        self.output_dim = output_dim

    def build(self, input_shape):
        self.weight = self.add_weight(
            shape=(input_shape[-1], self.output_dim),
            initializer='random_normal',
            trainable=True
        )
        self.bias = self.add_weight(
            shape=(self.output_dim,),
            initializer='random_normal',
            trainable=True
        )

    def call(self, inputs):
        # 自定義前向傳播邏輯
        return tf.matmul(inputs, self.weight) + self.bias

# 使用自定義層創建模型
def create_custom_keras_model():
    model = tf.keras.Sequential([
        layers.Flatten(input_shape=(28, 28, 1)),
        CustomLayerKeras(128),
        layers.Activation('relu'),
        layers.Dense(10, activation='softmax')
    ])

    model.compile(optimizer='adam',
                 loss='categorical_crossentropy',
                 metrics=['accuracy'])

    return model

# ===== 自定義訓練循環比較 =====
# PyTorch 自定義訓練循環
def custom_train_pytorch(model, trainloader, testloader, criterion, optimizer, epochs=3):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    for epoch in range(epochs):
        model.train()  # 設置為訓練模式
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data[0].to(device), data[1].to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            if i % 100 == 99:
                print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 100:.3f}')
                running_loss = 0.0

        # 驗證
        model.eval()  # 設置為評估模式
        with torch.no_grad():
            correct = 0
            total = 0
            for data in testloader:
                images, labels = data[0].to(device), data[1].to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            print(f'Epoch {epoch + 1}, Accuracy: {100 * correct / total:.2f}%')

    print('Finished Training')

# Keras 自定義訓練循環
def custom_train_keras_with_loop(model, train_images, train_labels, test_images, test_labels, epochs=3, batch_size=64):
    # 獲取訓練樣本數
    train_size = train_images.shape[0]
    loss_fn = tf.keras.losses.CategoricalCrossentropy() # Obtain the loss function object

    for epoch in range(epochs):
        print(f'Epoch {epoch + 1}/{epochs}')

        # 訓練階段
        indices = np.random.permutation(train_size)
        num_batches = train_size // batch_size

        loss_sum = 0
        for i in range(num_batches):
            batch_indices = indices[i * batch_size:(i + 1) * batch_size]
            batch_x = train_images[batch_indices]
            batch_y = train_labels[batch_indices]

            with tf.GradientTape() as tape:
                logits = model(batch_x, training=True)
                loss_value = loss_fn(batch_y, logits) # Call the loss function object

            grads = tape.gradient(loss_value, model.trainable_weights)
            model.optimizer.apply_gradients(zip(grads, model.trainable_weights))

            loss_sum += loss_value

            if i % 100 == 99:
                print(f'Batch {i + 1}/{num_batches}, Loss: {loss_sum / 100:.4f}')
                loss_sum = 0

        # 評估階段
        test_logits = model.predict(test_images)
        test_acc = np.mean(np.argmax(test_logits, axis=1) ==
                          np.argmax(test_labels, axis=1))
        print(f'Test accuracy: {test_acc * 100:.2f}%')


In [None]:
# prompt: 請參考前面def compare_frameworks()的流程，比較pytorch以及keras自定義訓練循環

# ===== 自定義訓練循環比較 (執行區塊)=====
# 建立 PyTorch 自定義模型實例
custom_model_torch = CustomModel()

# 定義損失函數和優化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(custom_model_torch.parameters(), lr=0.001)

# 執行 PyTorch 自定義訓練循環
print("開始 PyTorch 自定義訓練循環...")
custom_train_pytorch(custom_model_torch, trainloader_torch, testloader_torch, criterion, optimizer, epochs=3)


# 建立 Keras 自定義模型實例
custom_model_keras = create_custom_keras_model()

# 執行 Keras 自定義訓練循環
print("\n開始 Keras 自定義訓練循環...")
custom_train_keras_with_loop(custom_model_keras, train_images, train_labels, test_images, test_labels, epochs=3)


**數據處理與擴充比較**

In [None]:
# ===== 數據處理和數據增強比較 =====

# PyTorch 數據處理與增強
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

# 定義更複雜的數據增強
transform_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1))
])

# 自定義數據集
class CustomDataset(Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data[idx]
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

# TensorFlow/Keras 數據處理與增強
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# 定義數據增強器
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    validation_split=0.2
)

# 準備增強後的數據
def prepare_augmented_data(train_images, train_labels):
    datagen.fit(train_images)
    return datagen.flow(train_images, train_labels, batch_size=64)



# 顯示增強後的圖像（PyTorch）
def show_augmented_pytorch(dataset, num_images=5):
    dataloader = DataLoader(dataset, batch_size=num_images, shuffle=True)
    images, _ = next(iter(dataloader))

    plt.figure(figsize=(15, 3))
    for i in range(num_images):
        plt.subplot(1, num_images, i+1)
        plt.imshow(images[i][0].numpy(), cmap='gray')
        plt.axis('off')
    plt.suptitle('PyTorch增強後的圖像')
    plt.show()

# 顯示增強後的圖像（Keras）
def show_augmented_keras(datagen, images, num_images=5):
    augmented_images = next(datagen)[0][:num_images]

    plt.figure(figsize=(15, 3))
    for i in range(num_images):
        plt.subplot(1, num_images, i+1)
        plt.imshow(augmented_images[i].reshape(28, 28), cmap='gray')
        plt.axis('off')
    plt.suptitle('Keras增強後的圖像')
    plt.show()


In [None]:
#PyTorch
# Create dataset instance
trainset_torch = CustomDataset(train_images, train_labels, transform=transform_train)
# Create data loader
trainloader_torch = DataLoader(trainset_torch, batch_size=64, shuffle=True)


#TensorFlow/Keras
# Prepare data for augmentation
datagen.fit(train_images)
train_generator = datagen.flow(train_images, train_labels, batch_size=64)


# 執行比較
compare_frameworks()

# 結論
PyTorch與TensorFlow/Keras作為兩大主流的深度學習框架，各有其優勢和特點。PyTorch以其靈活性、動態計算圖和研究友好性贏得了學術界的青睞，特別適合需要創新和高度定制的研究項目。而TensorFlow/Keras則以其成熟的生態系統、優異的生產部署能力和簡潔的高階API在產業應用中佔據重要地位。

選擇使用哪個框架應根據具體項目需求、團隊熟悉度以及長期維護考量來決定。對於初學者，Keras提供了最平緩的學習曲線；對於研究人員，PyTorch提供了更大的靈活性；而對於企業應用，TensorFlow的生態系統則提供了從研發到部署的全方位支持。

無論您選擇哪個框架，本報告提供的比較和程式碼示例希望能幫助您更好地理解這兩個框架的異同，從而做出更明智的選擇。在深度學習技術不斷演進的今天，掌握多種框架的能力將使您在面對不同挑戰時更具靈活性和競爭力。