In [6]:
#汽车处理图像转化为模型输入
import cv2
import numpy as np
from PIL import Image
from torchvision import transforms
import torch
import torch.nn as nn


def slice_image(image):
    """
    提取图像的下半部分。
    
    参数：
    - image: 输入的图像。
    
    返回：
    - 图像的下半部分。
    """
    width, height = image.size
    # 计算下半部分的裁剪区域
    box = (0, height // 2, width, height)
    lower_half = image.crop(box)
    
    return lower_half

def process_image(image):
    """
    处理单张图片：提取下半部分并应用Canny边缘检测。
    
    参数：
    - image: 输入的图像。
    
    返回：
    - 处理后的图像（Canny边缘检测结果），大小为 (32, 32)。
    """
    # 第一步：提取图片的下半部分
    lower_half = slice_image(image)
    
    # 第二步：将下半部分转换为OpenCV格式
    lower_half_cv = cv2.cvtColor(np.array(lower_half), cv2.COLOR_RGB2BGR)
    
    # 第三步：转换为灰度图
    gray = cv2.cvtColor(lower_half_cv, cv2.COLOR_BGR2GRAY)
    
    # 第四步：应用高斯模糊，减少噪声
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # 第五步：应用Canny边缘检测
    edges = cv2.Canny(blurred, 50, 150)
    
    # 确保输出图像大小为 (32, 32)
    edges_resized = cv2.resize(edges, (64, 64))  # 将图像调整为32x32大小
    
    return edges_resized

def process_image_for_model(image):
    """
    处理图像以便输入到模型：裁剪下半部分，调整大小，转换为张量并进行归一化。
    
    参数：
    - image: 输入的图像，假设是 NumPy 数组格式。
    
    返回：
    - 预处理后的图像张量
    """
    # 将 NumPy 数组转换为 PIL 图像
    result_image_pil = Image.fromarray(image.astype(np.uint8))

    # 确保图像是 RGB 格式
    if result_image_pil.mode != 'RGB':
        result_image_pil = result_image_pil.convert('RGB')
    
    # 定义 transform（已经给定了）
    transform = transforms.Compose([
        transforms.Resize((64, 64)),  # 确保图像大小为32x32
        transforms.ToTensor(),        # 将图像转换为 Tensor 格式
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 归一化
    ])
    
    # 对图像进行预处理
    image_tensor = transform(result_image_pil).unsqueeze(0)  # 增加 batch 维度
    return image_tensor

def process_and_prepare_for_model(image):
    """
    整合图像处理与预处理流程：
    1. 提取图像下半部分并进行Canny边缘检测
    2. 将处理后的图像转换为模型可用的格式
    
    参数：
    - image: 输入的图像，假设为 PIL 图像
    
    返回：
    - 处理并准备好的图像张量
    """
    # 步骤1：处理图像（提取下半部分并进行边缘检测）
    processed_image = process_image(slice_image(image))
    
    # 步骤2：将处理后的图像转换为模型输入格式
    # 由于 process_image 返回的是 NumPy 数组，先转换为模型需要的格式
    return process_image_for_model(processed_image)

# 定义模型结构（在model set中）
def convnet1(image_size=64):
    return nn.Sequential(
        # 第一层卷积层，减少通道数
        nn.Conv2d(3, 4, kernel_size=3, stride=1, padding=1),  # 输出：64x64x4
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),  # 输出：32x32x4

        # 第二层卷积层
        nn.Conv2d(4, 8, kernel_size=3, stride=1, padding=1),  # 输出：32x32x8
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),  # 输出：16x16x8

        # 第三层卷积层
        nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1),  # 输出：16x16x16
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),  # 输出：8x8x16
        
        nn.Flatten(),  # Flatten the output

        # 全连接层
        nn.Linear(16 * 8 * 8, 128),  # 修改全连接层输入大小：16 * 8 * 8 = 1024
        nn.ReLU(),

        # Dropout 层，减少过拟合
        nn.Dropout(0.3),

        # 输出层
        nn.Linear(128, 3)  # 输出 3 个类别
    )

In [8]:
image_path = r'C:\Users\yil\Desktop\ipy project\bot-project\transfer_picture\Driving-picture\2-angle+.jpg'

# 打开并加载图片
image = Image.open(image_path)

In [12]:
print(type(image))

<class 'PIL.JpegImagePlugin.JpegImageFile'>


In [10]:
# 处理并准备图像以便输入到模型
model_input = process_and_prepare_for_model(image)

In [29]:
# 重新构建相同的模型结构
model = convnet1(image_size=64)

# 加载保存的权重
model_weights = torch.load('model_weights_3.pth')
model.load_state_dict(model_weights)

  model_weights = torch.load('model_weights_3.pth')


<All keys matched successfully>

In [31]:
model.eval()

Sequential(
  (0): Conv2d(3, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(4, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (4): ReLU()
  (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): ReLU()
  (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (9): Flatten(start_dim=1, end_dim=-1)
  (10): Linear(in_features=1024, out_features=128, bias=True)
  (11): ReLU()
  (12): Dropout(p=0.3, inplace=False)
  (13): Linear(in_features=128, out_features=3, bias=True)
)

In [33]:
with torch.no_grad():
    output = model(model_input)

# 获取预测类别
_, predicted_class = torch.max(output, 1)
print(f"Predicted class: {predicted_class.item()}")

Predicted class: 1


In [13]:
print(type(predicted_class.item()))

<class 'int'>


In [None]:
# Image processing for car images to model input
import cv2
import numpy as np
from PIL import Image
from torchvision import transforms
import torch
import torch.nn as nn

def slice_image(image):
    """
    Extract the lower half of the image.
    
    Parameters:
    - image: The input image.
    
    Returns:
    - The lower half of the image.
    """
    width, height = image.size
    # Calculate the cropping area for the lower half
    box = (0, height // 2, width, height)
    lower_half = image.crop(box)
    
    return lower_half

def process_image(image):
    """
    Process a single image: extract the lower half and apply Canny edge detection.
    
    Parameters:
    - image: The input image.
    
    Returns:
    - The processed image (Canny edge detection result), resized to (32, 32).
    """
    # Step 1: Extract the lower half of the image
    lower_half = slice_image(image)
    
    # Step 2: Convert the lower half to OpenCV format
    lower_half_cv = cv2.cvtColor(np.array(lower_half), cv2.COLOR_RGB2BGR)
    
    # Step 3: Convert to grayscale
    gray = cv2.cvtColor(lower_half_cv, cv2.COLOR_BGR2GRAY)
    
    # Step 4: Apply Gaussian blur to reduce noise
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # Step 5: Apply Canny edge detection
    edges = cv2.Canny(blurred, 50, 150)
    
    # Ensure the output image is resized to (32, 32)
    edges_resized = cv2.resize(edges, (64, 64))  # Resize to 64x64
    
    return edges_resized

def process_image_for_model(image):
    """
    Process the image for model input: extract the lower half, resize, convert to tensor and normalize.
    
    Parameters:
    - image: The input image, assumed to be in NumPy array format.
    
    Returns:
    - The preprocessed image tensor.
    """
    # Convert the NumPy array to a PIL image
    result_image_pil = Image.fromarray(image.astype(np.uint8))

    # Ensure the image is in RGB format
    if result_image_pil.mode != 'RGB':
        result_image_pil = result_image_pil.convert('RGB')
    
    # Define the transformations (already provided)
    transform = transforms.Compose([
        transforms.Resize((64, 64)),  # Ensure the image size is 64x64
        transforms.ToTensor(),        # Convert the image to Tensor format
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
    ])
    
    # Apply the transformations
    image_tensor = transform(result_image_pil).unsqueeze(0)  # Add the batch dimension
    return image_tensor

def process_and_prepare_for_model(image):
    """
    Integrate the image processing and preprocessing steps:
    1. Extract the lower half of the image and apply Canny edge detection.
    2. Convert the processed image into a format suitable for the model.
    
    Parameters:
    - image: The input image, assumed to be a PIL image.
    
    Returns:
    - The processed and prepared image tensor for the model.
    """
    # Step 1: Process the image (extract the lower half and apply edge detection)
    processed_image = process_image(slice_image(image))
    
    # Step 2: Convert the processed image into the model's input format
    # Since process_image returns a NumPy array, it is first converted to the required format
    return process_image_for_model(processed_image)

# Define the model architecture (in the model set)
def convnet1(image_size=64):
    return nn.Sequential(
        # First convolutional layer, reducing the number of channels
        nn.Conv2d(3, 4, kernel_size=3, stride=1, padding=1),  # Output: 64x64x4
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),  # Output: 32x32x4

        # Second convolutional layer
        nn.Conv2d(4, 8, kernel_size=3, stride=1, padding=1),  # Output: 32x32x8
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),  # Output: 16x16x8

        # Third convolutional layer
        nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1),  # Output: 16x16x16
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),  # Output: 8x8x16
        
        nn.Flatten(),  # Flatten the output

        # Fully connected layer
        nn.Linear(16 * 8 * 8, 128),  # Adjusted fully connected layer input size: 16 * 8 * 8 = 1024
        nn.ReLU(),

        # Dropout layer to reduce overfitting
        nn.Dropout(0.3),

        # Output layer
        nn.Linear(128, 3)  # Output 3 categories
    )
