In [None]:
!pip install tensorflow


In [3]:
import cv2

# 讀取影片
video_path = 'texture_video.avi'
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("無法開啟影片檔案。")
else:
    # 獲取影片資訊
    fps = cap.get(cv2.CAP_PROP_FPS)  # 幀速率
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))  # 總幀數
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))  # 影像寬度
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))  # 影像高度
    video_duration = frame_count / fps  # 影片秒數

    # 輸出資訊
    print(f"影片檔案: {video_path}")
    print(f"FPS: {fps}")
    print(f"畫素 (寬 x 高): {width} x {height}")
    print(f"總幀數: {frame_count}")
    print(f"影片秒數: {video_duration:.2f} 秒")

cap.release()



影片檔案: texture_video.avi
FPS: 30.0
畫素 (寬 x 高): 800 x 600
總幀數: 5437
影片秒數: 181.23 秒


In [27]:
import cv2

def get_image_info(image_path):
    # 讀取影像
    image = cv2.imread(image_path)
    if image is None:
        return f"Error: Unable to load image from {image_path}"

    # 取得解析度
    resolution = image.shape
    height, width = resolution[:2]

    return {
        "path": image_path,
        "width": width,
        "height": height,
        "resolution": f"{width} x {height}"
    }

# 定義影像路徑
image_paths = ["image1.png", "image2.png", "image1_groundtruth.png", "image2_groundtruth.png"]

# 逐一處理影像並回報資訊
for path in image_paths:
    info = get_image_info(path)
    print(info)


{'path': 'image1.png', 'width': 512, 'height': 512, 'resolution': '512 x 512'}
{'path': 'image2.png', 'width': 512, 'height': 512, 'resolution': '512 x 512'}
{'path': 'image1_groundtruth.png', 'width': 512, 'height': 512, 'resolution': '512 x 512'}
{'path': 'image2_groundtruth.png', 'width': 512, 'height': 512, 'resolution': '512 x 512'}


In [39]:
import cv2
import numpy as np

def analyze_image_colors(image_path):
    """
    分析影像中的顏色分佈情況。
    
    參數:
        image_path (str): 影像的檔案路徑。
    
    返回:
        dict: 包含各顏色（灰階或 RGB 值）及其對應像素數量的字典。
    """
    # 讀取影像
    image = cv2.imread(image_path)
    if image is None:
        return f"Error: Unable to load image from {image_path}"
    
    # 檢查影像是否為灰階
    if len(image.shape) == 2:  # 單通道灰階影像
        unique, counts = np.unique(image, return_counts=True)
        color_distribution = dict(zip(unique, counts))
    else:  # RGB 或其他多通道影像
        # 將每個通道分開處理
        color_distribution = {}
        for channel, color_name in enumerate(["Blue", "Green", "Red"]):
            unique, counts = np.unique(image[:, :, channel], return_counts=True)
            color_distribution[color_name] = dict(zip(unique, counts))
    
    return color_distribution

# 測試影像路徑
groundtruth_images = ["image1_groundtruth.png", "image2_groundtruth.png"]

# 分析並輸出每張影像的顏色分佈
for image_path in groundtruth_images:
    result = analyze_image_colors(image_path)
    print(f"Color distribution for {image_path}:")
    print(result)
    print()


Color distribution for image1_groundtruth.png:
{'Blue': {0: 9538, 255: 252606}, 'Green': {0: 9538, 255: 252606}, 'Red': {0: 9538, 255: 252606}}

Color distribution for image2_groundtruth.png:
{'Blue': {0: 6122, 255: 256022}, 'Green': {0: 6122, 255: 256022}, 'Red': {0: 6122, 255: 256022}}



In [5]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models

# 載入影片檔案
cap = cv2.VideoCapture('texture_video.avi')
frames = []

# 讀取前720幀影像
for i in range(720):
    ret, frame = cap.read()
    if ret:
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)  # 轉為灰階
        frame = cv2.resize(frame, (800, 600))  # 調整大小
        frames.append(frame)

cap.release()

# 轉為 NumPy 陣列，並正規化
frames = np.array(frames) / 255.0
frames = frames.reshape(-1, 800, 600, 1)  # 調整為 (幀數, 高, 寬, 測量通道數)

# 定義 Autoencoder 模型
def create_autoencoder():
    input_img = layers.Input(shape=(800, 600, 1))
    # Encoder
    x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
    x = layers.MaxPooling2D((2, 2), padding='same')(x)
    x = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(x)
    encoded = layers.MaxPooling2D((2, 2), padding='same')(x)
    # Decoder
    x = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(encoded)
    x = layers.UpSampling2D((2, 2))(x)
    x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    x = layers.UpSampling2D((2, 2))(x)
    decoded = layers.Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
    return models.Model(input_img, decoded)

# 建立模型
autoencoder = create_autoencoder()
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

# 訓練模型
autoencoder.fit(frames, frames, epochs=20, batch_size=8, shuffle=True)


Epoch 1/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 461ms/step - loss: 0.6818
Epoch 2/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 465ms/step - loss: 0.6781
Epoch 3/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 502ms/step - loss: 0.6780
Epoch 4/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 512ms/step - loss: 0.6776
Epoch 5/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 507ms/step - loss: 0.6773
Epoch 6/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 486ms/step - loss: 0.6773
Epoch 7/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 491ms/step - loss: 0.6772
Epoch 8/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 479ms/step - loss: 0.6770
Epoch 9/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 484ms/step - loss: 0.6770
Epoch 10/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 478ms

<keras.src.callbacks.history.History at 0x281668f8e00>

In [41]:
import cv2
import numpy as np

# 動態讀取影像並獲取資訊
def load_and_preprocess_image(image_path):
    # 獲取影像資訊
    image_info = get_image_info(image_path)
    if isinstance(image_info, str):  # 錯誤訊息處理
        raise ValueError(image_info)

    # 根據解析度讀取與處理影像
    width = image_info["width"]
    height = image_info["height"]
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (width, height))  # 確保讀取後調整大小一致
    image = image / 255.0  # 正規化
    return image.reshape(1, height, width, 1)  # 返回形狀為 (1, 高, 寬, 通道)

# 加載影像和 Groundtruth
image1 = load_and_preprocess_image("image1.png")
image2 = load_and_preprocess_image("image2.png")
gt_image1 = load_and_preprocess_image("image1_groundtruth.png").astype(int)
gt_image2 = load_and_preprocess_image("image2_groundtruth.png").astype(int)

# 使用 Autoencoder 預測影像
reconstructed_image1 = autoencoder.predict(image1)
reconstructed_image2 = autoencoder.predict(image2)

# 計算像素差異
threshold =10  # 閾值
diff_image1 = np.abs(image1 - reconstructed_image1) > threshold
diff_image2 = np.abs(image2 - reconstructed_image2) > threshold

# 計算混淆矩陣
def calculate_confusion_matrix(groundtruth, prediction):
    TP = np.sum((groundtruth == 1) & (prediction == 1))
    FP = np.sum((groundtruth == 0) & (prediction == 1))
    TN = np.sum((groundtruth == 0) & (prediction == 0))
    FN = np.sum((groundtruth == 1) & (prediction == 0))
    return {"TP": TP, "FP": FP, "TN": TN, "FN": FN}

# 計算 Image 1 和 Image 2 的混淆矩陣
conf_matrix1 = calculate_confusion_matrix(gt_image1[0, :, :, 0], diff_image1[0, :, :, 0])
conf_matrix2 = calculate_confusion_matrix(gt_image2[0, :, :, 0], diff_image2[0, :, :, 0])

# 輸出混淆矩陣結果
print("Image 1 Confusion Matrix:", conf_matrix1)
print("Image 2 Confusion Matrix:", conf_matrix2)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
Image 1 Confusion Matrix: {'TP': 0, 'FP': 0, 'TN': 9538, 'FN': 252606}
Image 2 Confusion Matrix: {'TP': 0, 'FP': 0, 'TN': 6122, 'FN': 256022}


In [17]:
# 載入測試影像
def load_and_preprocess_image(filename):
    img = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, (800, 600))
    img = img / 255.0
    return img.reshape(1, 800, 600, 1)

image1 = load_and_preprocess_image('image1.png')
image2 = load_and_preprocess_image('image2.png')

# 使用 Autoencoder 進行重建
reconstructed_image1 = autoencoder.predict(image1)
reconstructed_image2 = autoencoder.predict(image2)

# 計算像素差異
threshold = 0.2  # 設定像素差異閾值
diff_image1 = np.abs(image1 - reconstructed_image1) > threshold
diff_image2 = np.abs(image2 - reconstructed_image2) > threshold


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 285ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 262ms/step


In [19]:
def load_groundtruth(filename):
    img = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, (600, 800))  # 確保與 prediction 的形狀一致
    return (img == 0).astype(int)  # 黑色為瑕疵，轉為二值化


In [21]:
# 載入 Groundtruth
gt_image1 = load_groundtruth('image1_groundtruth.png')
gt_image2 = load_groundtruth('image2_groundtruth.png')

# 計算混淆矩陣
tp1, fp1, tn1, fn1 = calculate_confusion_matrix(gt_image1, diff_image1[0, :, :, 0])
tp2, fp2, tn2, fn2 = calculate_confusion_matrix(gt_image2, diff_image2[0, :, :, 0])

# 輸出混淆矩陣結果
print("Image 1 Confusion Matrix:")
print(f"TP: {tp1}, FP: {fp1}, TN: {tn1}, FN: {fn1}")

print("Image 2 Confusion Matrix:")
print(f"TP: {tp2}, FP: {fp2}, TN: {tn2}, FN: {fn2}")


Image 1 Confusion Matrix:
TP: 0, FP: 0, TN: 463666, FN: 16334
Image 2 Confusion Matrix:
TP: 0, FP: 0, TN: 469190, FN: 10810


In [15]:
print(f"Groundtruth 1 shape: {gt_image1.shape}")
print(f"Groundtruth 2 shape: {gt_image2.shape}")
print(f"Prediction 1 shape: {diff_image1[0, :, :, 0].shape}")
print(f"Prediction 2 shape: {diff_image2[0, :, :, 0].shape}")


Groundtruth 1 shape: (800, 600)
Groundtruth 2 shape: (800, 600)
Prediction 1 shape: (800, 600)
Prediction 2 shape: (800, 600)
