# 確認模型精度

In [None]:
import torch
import torchvision.models as models

# 指定 .pth 檔案路徑
model_path = "../Result/resnet18_1600_v4.pth"

# 建立 ResNet-18 模型結構並載入權重
model = models.resnet18()
model.load_state_dict(torch.load(model_path, map_location='cpu'))  # 只載入權重

# 定義函數來檢查模型的資料型別
def check_dtype(model):
    for name, param in model.named_parameters():
        dtype = param.dtype
        print(f"Layer: {name}, Data Type: {dtype}")

# 呼叫函數檢查模型的資料型別
check_dtype(model)


# 加載fintuning資料集


In [11]:
import glob
import os
import PIL.Image
import numpy as np
import torch
import torchvision.transforms as transforms
import torch
import torchvision.models as models
from torch.utils.data import DataLoader, random_split
def get_x(path, width):
    """Gets the x value from the image filename"""
    return (float(int(path.split("_")[1])) - width / 2) / (width / 2)

def get_y(path, height):
    """Gets the y value from the image filename"""
    return (float(int(path.split("_")[2])) - height / 2) / (height / 2)

class XYDataset(torch.utils.data.Dataset):
    
    def __init__(self, directory, random_hflips=False):
        self.directory = directory
        self.random_hflips = random_hflips
        self.image_paths = glob.glob(os.path.join(self.directory, '*.jpg'))
        self.color_jitter = transforms.ColorJitter(0.3, 0.3, 0.3, 0.3)
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        
        image = PIL.Image.open(image_path)
        width, height = image.size
        x = float(get_x(os.path.basename(image_path), width))
        y = float(get_y(os.path.basename(image_path), height))
        
        # 裁切掉上半部 40%，只保留下半部的 60%
        cropped_image = image.crop((0, int(height * 0.4), width, height))  # 保留從 40% 開始到底部的部分
        
        # 水平翻轉圖片（若隨機觸發）
        if float(np.random.rand(1)) > 0.5 and self.random_hflips:
            cropped_image = transforms.functional.hflip(cropped_image)
            x = -x
        
        # 應用顏色抖動
        cropped_image = self.color_jitter(cropped_image)
        
        # 調整圖片大小至 224x134
        cropped_image = transforms.functional.resize(cropped_image, (134, 224))
        
        # 轉換為 tensor 並進行標準化
        image_tensor = transforms.functional.to_tensor(cropped_image)
        image_tensor = transforms.functional.normalize(image_tensor, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        
        return image_tensor, torch.tensor([x, y]).float()
    
# 建立資料集實例
dataset = XYDataset('1600-v4', random_hflips=False)
print(dataset[0][1])
train_loader = DataLoader(
    dataset,
    batch_size=8,
    shuffle=True,
    num_workers=0
)

tensor([-0.9464,  0.7143])


  if float(np.random.rand(1)) > 0.5 and self.random_hflips:


# 重新fintuning

In [20]:
from torch.optim import Adam
from torch.nn import MSELoss
from torch import GradScaler, autocast

# 指定 .pth 檔案路徑
model_path = "../Result/resnet18_1600_v4.pth"

# 建立 ResNet-18 模型結構並載入權重
model = models.resnet18()
model.load_state_dict(torch.load(model_path, map_location='cpu'), strict=False)  # 忽略 fc 層的形狀不匹配
model.fc = torch.nn.Linear(model.fc.in_features, 2)
# 檢查是否有 GPU，並將模型移動到 GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)

# 使用自動混合精度和 GradScaler
scaler = GradScaler()

# 將模型設置為訓練模式
model.train()

# 定義優化器和 MSE 損失函數
optimizer = Adam(model.parameters(), lr=1e-4)  # 調整學習率
criterion = MSELoss()  # 使用 MSELoss

# 微調模型
for epoch in range(5):  # 訓練 5 個 epoch，可以根據需要調整
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # 優化器梯度歸零
        optimizer.zero_grad()

        # 使用 autocast 進行自動混合精度訓練
        with autocast(device):
            outputs = model(inputs)
            loss = criterion(outputs, labels)

        # 進行反向傳播
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item()

    print(f"Epoch [{epoch + 1}/5], Loss: {running_loss / len(train_loader):.4f}")

# 儲存微調後的模型
torch.save(model.state_dict(), "resnet18_finetuned_fp16.pth")

  model.load_state_dict(torch.load(model_path, map_location='cpu'), strict=False)  # 忽略 fc 層的形狀不匹配
  if float(np.random.rand(1)) > 0.5 and self.random_hflips:


Epoch [1/5], Loss: 0.0563
Epoch [2/5], Loss: 0.0274
Epoch [3/5], Loss: 0.0187
Epoch [4/5], Loss: 0.0145
Epoch [5/5], Loss: 0.0124


# 測試結果

In [22]:
import time
import torch
import torchvision.transforms as transforms
import cv2
import os
import numpy as np
import ipywidgets as widgets
from IPython.display import display
import torchvision.models as models

# 設定資料夾路徑
image_folder = "1600-v4"
image_files = [os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]

# 設定 widget 以顯示圖片
widget_width = 224
widget_height = 224
image_widget = widgets.Image(format='jpeg', width=widget_width, height=widget_height)
display(image_widget)

# 設定裝置 (若有 GPU 可用)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# 加載模型
model = models.resnet18()
model.fc = torch.nn.Linear(model.fc.in_features, 2)
model.load_state_dict(torch.load("resnet18_finetuned_fp16.pth", map_location=device))
model = model.to(device)
model.eval()  # 將模型設定為推論模式

# 圖片預處理和顯示函數
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.4691, 0.4032, 0.4579], [0.1740, 0.1485, 0.1688])
])

def bgr8_to_jpeg(image):
    _, jpeg = cv2.imencode('.jpg', image)
    return jpeg.tobytes()

def process_image(image_path):
    # 讀取圖片並進行裁切
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    height, width, _ = image.shape
    
    # 裁切圖片的下部 60%
    cropped_image = image[int(height * 0.4):, :, :]  # 保留從 40% 開始到高度底部的部分

    # 調整裁切後的圖片大小為 224x134
    cropped_image = cv2.resize(cropped_image, (224, 134))
    
    # 預處理圖片
    input_tensor = transform(cropped_image).unsqueeze(0).to(device)
    
    with torch.no_grad():
        output = model(input_tensor)
        x, y = output[0].cpu().numpy()
    print(x,y/2)
    # 映射預測的 (x, y) 到圖片的像素坐標，假設 x 和 y 是 [0, 1] 範圍內的預測
    x_pixel = int(x * 224 / 2 + 224 / 2)
    y_pixel = int(y * 134 / 2 + 134 / 2)
    
    # 在圖片上繪製預測結果
    display_image = cv2.cvtColor(cropped_image, cv2.COLOR_RGB2BGR)  # 確保格式正確
    cv2.circle(display_image, (x_pixel, y_pixel), 5, (0, 255, 0), -1)  # 綠色點表示預測位置
    
    # 更新 widget 顯示處理後的圖片
    image_widget.value = bgr8_to_jpeg(display_image)

# 對資料夾中的每張圖片進行處理
for image_file in image_files:
    process_image(image_file)
    time.sleep(2)  # 暫停以觀察每張圖片的結果


Image(value=b'', format='jpeg', height='224', width='224')

cuda


  model.load_state_dict(torch.load("resnet18_finetuned_fp16.pth", map_location=device))


-0.60001105 0.31324252486228943
-0.40425605 0.39529359340667725
-0.40735704 0.30219483375549316
-0.4461247 0.3385683298110962
-0.5541498 0.3120160400867462
-0.45412403 0.3303561806678772
-0.15889488 0.3410866856575012
-0.3298407 0.3084648847579956
-0.34694484 0.3343881368637085
-0.5227075 0.39831629395484924
-0.4012406 0.33496031165122986
-0.3925773 0.284706175327301
-0.31100428 0.29326120018959045
-0.45765835 0.3318188488483429
-0.46541947 0.3590194284915924
-0.31953382 0.3064567744731903
-0.3598785 0.35759344696998596
-0.41651493 0.35502201318740845
-0.32724297 0.3445821702480316
-0.34480003 0.3156152665615082
-0.5355159 0.3351578116416931
-0.29403314 0.320288747549057
-0.32230496 0.37137141823768616
-0.30138975 0.33989453315734863
-0.5510939 0.35387536883354187
-0.46533853 0.3651455342769623
-0.30088997 0.3195010721683502
-0.21012627 0.2789176404476166
-0.31392008 0.31438782811164856
-0.23083268 0.3342224657535553
-0.41592067 0.3077930510044098
-0.47429073 0.3031228482723236
-0.3550

KeyboardInterrupt: 