# 確認模型精度

In [None]:
import torch
import torchvision.models as models

# 指定 .pth 檔案路徑
model_path = "../Result/resnet18_1600_v4.pth"

# 建立 ResNet-18 模型結構並載入權重
model = models.resnet18()
model.load_state_dict(torch.load(model_path, map_location='cpu'))  # 只載入權重

# 定義函數來檢查模型的資料型別
def check_dtype(model):
    for name, param in model.named_parameters():
        dtype = param.dtype
        print(f"Layer: {name}, Data Type: {dtype}")

# 呼叫函數檢查模型的資料型別
check_dtype(model)


# 加載fintuning資料集


In [11]:
import glob
import os
import PIL.Image
import numpy as np
import torch
import torchvision.transforms as transforms
import torch
import torchvision.models as models
from torch.utils.data import DataLoader, random_split
def get_x(path, width):
    """Gets the x value from the image filename"""
    return (float(int(path.split("_")[1])) - width / 2) / (width / 2)

def get_y(path, height):
    """Gets the y value from the image filename"""
    return (float(int(path.split("_")[2])) - height / 2) / (height / 2)

class XYDataset(torch.utils.data.Dataset):
    
    def __init__(self, directory, random_hflips=False):
        self.directory = directory
        self.random_hflips = random_hflips
        self.image_paths = glob.glob(os.path.join(self.directory, '*.jpg'))
        self.color_jitter = transforms.ColorJitter(0.3, 0.3, 0.3, 0.3)
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        
        image = PIL.Image.open(image_path)
        width, height = image.size
        x = float(get_x(os.path.basename(image_path), width))
        y = float(get_y(os.path.basename(image_path), height))
        
        # 裁切掉上半部 40%，只保留下半部的 60%
        cropped_image = image.crop((0, int(height * 0.4), width, height))  # 保留從 40% 開始到底部的部分
        
        # 水平翻轉圖片（若隨機觸發）
        if float(np.random.rand(1)) > 0.5 and self.random_hflips:
            cropped_image = transforms.functional.hflip(cropped_image)
            x = -x
        
        # 應用顏色抖動
        cropped_image = self.color_jitter(cropped_image)
        
        # 調整圖片大小至 224x134
        cropped_image = transforms.functional.resize(cropped_image, (134, 224))
        
        # 轉換為 tensor 並進行標準化
        image_tensor = transforms.functional.to_tensor(cropped_image)
        image_tensor = transforms.functional.normalize(image_tensor, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        
        return image_tensor, torch.tensor([x, y]).float()
    
# 建立資料集實例
dataset = XYDataset('1600-v4', random_hflips=False)
print(dataset[0][1])
train_loader = DataLoader(
    dataset,
    batch_size=8,
    shuffle=True,
    num_workers=0
)

tensor([-0.9464,  0.7143])


  if float(np.random.rand(1)) > 0.5 and self.random_hflips:


# 重新調整精度(fp32->fp16) 剪枝10%

In [26]:
import torch
import torchvision.models as models
from torch.optim import Adam
from torch.nn import MSELoss
from torch.nn.utils import prune
from torch.cuda.amp import autocast, GradScaler

# 指定 .pth 檔案路徑
model_path = "../Result/resnet18_1600_v4.pth"

# 建立 ResNet-18 模型結構並載入權重
model = models.resnet18()
model.load_state_dict(torch.load(model_path, map_location='cpu'), strict=False)  # 忽略 fc 層的形狀不匹配
model.fc = torch.nn.Linear(model.fc.in_features, 2)  # 修改最後一層輸出為 2，以匹配 [x, y]


# 對模型的卷積層進行剪枝
def apply_pruning(module, amount=0.1):
    if isinstance(module, torch.nn.Conv2d):
        prune.l1_unstructured(module, name="weight", amount=amount)


model.apply(lambda module: apply_pruning(module, amount=0.1))

# 檢查是否有 GPU，並將模型移動到 GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)

# 使用自動混合精度和 GradScaler
scaler = GradScaler()

# 將模型設置為訓練模式
model.train()

# 定義優化器和 MSE 損失函數
optimizer = Adam(model.parameters(), lr=1e-4)  # 調整學習率
criterion = MSELoss()  # 使用 MSELoss

# 微調模型
for epoch in range(10):  # 訓練 10 個 epoch，可以根據需要調整
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # 優化器梯度歸零
        optimizer.zero_grad()

        # 使用 autocast 進行自動混合精度訓練
        with autocast():
            outputs = model(inputs)
            loss = criterion(outputs, labels)

        # 進行反向傳播
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item()

    print(f"Epoch [{epoch + 1}/10], Loss: {running_loss / len(train_loader):.4f}")

# 移除剪枝掩碼，將剪枝後的權重變為永久權重
for module in model.modules():
    if isinstance(module, torch.nn.Conv2d):
        prune.remove(module, 'weight')

# 儲存微調後的模型
torch.save(model.state_dict(), "resnet18_finetuned_fp16_pruned.pth")


  model.load_state_dict(torch.load(model_path, map_location='cpu'), strict=False)  # 忽略 fc 層的形狀不匹配
  scaler = GradScaler()
  if float(np.random.rand(1)) > 0.5 and self.random_hflips:
  with autocast():


Epoch [1/10], Loss: 0.0548
Epoch [2/10], Loss: 0.0269
Epoch [3/10], Loss: 0.0216
Epoch [4/10], Loss: 0.0157
Epoch [5/10], Loss: 0.0125
Epoch [6/10], Loss: 0.0098
Epoch [7/10], Loss: 0.0079
Epoch [8/10], Loss: 0.0069
Epoch [9/10], Loss: 0.0065
Epoch [10/10], Loss: 0.0061


# 測試結果

In [27]:
import time
import torch
import torchvision.transforms as transforms
import cv2
import os
import numpy as np
import ipywidgets as widgets
from IPython.display import display
import torchvision.models as models

# 設定資料夾路徑
image_folder = "1600-v4"
image_files = [os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]

# 設定 widget 以顯示圖片
widget_width = 224
widget_height = 224
image_widget = widgets.Image(format='jpeg', width=widget_width, height=widget_height)
display(image_widget)

# 設定裝置 (若有 GPU 可用)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# 加載模型
model = models.resnet18()
model.fc = torch.nn.Linear(model.fc.in_features, 2)
model.load_state_dict(torch.load("resnet18_finetuned_fp16_pruned.pth", map_location=device))
model = model.to(device)
model.eval()  # 將模型設定為推論模式

# 圖片預處理和顯示函數
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.4691, 0.4032, 0.4579], [0.1740, 0.1485, 0.1688])
])

def bgr8_to_jpeg(image):
    _, jpeg = cv2.imencode('.jpg', image)
    return jpeg.tobytes()

def process_image(image_path):
    # 讀取圖片並進行裁切
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    height, width, _ = image.shape
    
    # 裁切圖片的下部 60%
    cropped_image = image[int(height * 0.4):, :, :]  # 保留從 40% 開始到高度底部的部分

    # 調整裁切後的圖片大小為 224x134
    cropped_image = cv2.resize(cropped_image, (224, 134))
    
    # 預處理圖片
    input_tensor = transform(cropped_image).unsqueeze(0).to(device)
    
    with torch.no_grad():
        output = model(input_tensor)
        x, y = output[0].cpu().numpy()
    print(x,y/2)
    # 映射預測的 (x, y) 到圖片的像素坐標，假設 x 和 y 是 [0, 1] 範圍內的預測
    x_pixel = int(x * 224 / 2 + 224 / 2)
    y_pixel = int(y * 134 / 2 + 134 / 2)
    
    # 在圖片上繪製預測結果
    display_image = cv2.cvtColor(cropped_image, cv2.COLOR_RGB2BGR)  # 確保格式正確
    cv2.circle(display_image, (x_pixel, y_pixel), 5, (0, 255, 0), -1)  # 綠色點表示預測位置
    
    # 更新 widget 顯示處理後的圖片
    image_widget.value = bgr8_to_jpeg(display_image)

# 對資料夾中的每張圖片進行處理
for image_file in image_files:
    process_image(image_file)
    time.sleep(2)  # 暫停以觀察每張圖片的結果


Image(value=b'', format='jpeg', height='224', width='224')

cuda


  model.load_state_dict(torch.load("resnet18_finetuned_fp16_pruned.pth", map_location=device))


-0.9946802 0.36236900091171265
-0.74837273 0.4469776749610901
-0.76901275 0.2802664637565613
-0.9104858 0.3827850818634033
-0.85545534 0.3716302514076233
-0.76147217 0.3913075923919678
-0.95470256 0.3939242362976074
-0.9038053 0.3488048315048218
-0.93339026 0.3653039038181305
-0.7635399 0.4303571283817291
-0.9383551 0.332721084356308
-0.90690684 0.3390689492225647
-0.73828983 0.3206668794155121
-0.89149547 0.35504066944122314
-0.9136929 0.36700886487960815
-0.67804617 0.37622275948524475
-1.0230854 0.42201387882232666
-0.75872666 0.33321619033813477
-0.87140626 0.3860875368118286
-0.7770744 0.41226324439048767
-0.91841805 0.38596639037132263
-0.7491264 0.3910526633262634
-0.7458859 0.43500620126724243
-0.90625244 0.34816521406173706
-0.8016814 0.40036216378211975
-0.7434091 0.42813828587532043
-0.6336264 0.3365474045276642
-0.84745353 0.36397385597229004
-0.64746064 0.3600819706916809
-0.8224631 0.3465706408023834
-0.7646155 0.32444465160369873
-0.7848154 0.39144590497016907
-0.7211063

KeyboardInterrupt: 