In [7]:
import numpy as np
import tensorflow as tf
import cv2
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications import ResNet50, Xception, InceptionV3
from tensorflow.keras.applications.resnet50 import preprocess_input as resnet_preprocess
from tensorflow.keras.applications.xception import preprocess_input as xcep_preprocess
from tensorflow.keras.applications.inception_v3 import preprocess_input as incep_preprocess
import tkinter as tk
from tkinter import filedialog
from tkinter import messagebox
from PIL import Image, ImageTk,ImageDraw,ImageFont
import threading



### 从摄像头读取视频

In [3]:


# 设置模型输入尺寸（统一为224，以兼容所有模型）
image_size = 224  # Xception 原本用 299，可以调整为 224

# 加载模型并设置权重路径
resnet = ResNet50(weights=None, include_top=False, pooling='avg', input_shape=(image_size, image_size, 3))
xception = Xception(weights=None, include_top=False, pooling='avg', input_shape=(image_size, image_size, 3))
inception = InceptionV3(weights=None, include_top=False, pooling='avg', input_shape=(image_size, image_size, 3))

# 加载权重
resnet.load_weights(r"F:\JupyterWorkSpace\BBBBBBS\models\resnet50-imagenet-finetune152.h5", by_name=True)
xception.load_weights(r"F:\JupyterWorkSpace\BBBBBBS\models\xception-imagenet-finetune116.h5", by_name=True)
inception.load_weights(r"F:\JupyterWorkSpace\BBBBBBS\models\inceptionV3-imagenet-finetune172.h5", by_name=True)

# 加载你训练好的融合分类器
model_mix = load_model(r"F:\JupyterWorkSpace\BBBBBBS\models\mixed-model.h5")

# 定义预测函数
def predict_single_image(image_path):
    # 加载图像并调整为模型输入尺寸
    img = load_img(image_path, target_size=(image_size, image_size))
    img_array = img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)

    # 使用三种模型提取特征（各自预处理）
    feat_resnet = resnet.predict(resnet_preprocess(np.copy(img_array)))
    feat_xcep = xception.predict(xcep_preprocess(np.copy(img_array)))
    feat_incep = inception.predict(incep_preprocess(np.copy(img_array)))

    # 拼接所有模型的 bottleneck 特征
    bottleneck_feature = np.concatenate([feat_resnet, feat_xcep, feat_incep], axis=1)

    # 用混合分类器做最终预测
    prediction = model_mix.predict(bottleneck_feature)

    # 获取预测结果并显示
    class_idx = np.argmax(prediction[0])
    classes = ['c0安全驾驶', 'c1右手打字', 'c2右手接电话', 'c3左手打字', 'c4左手接电话',
               'c5调收音机', 'c6喝饮料', 'c7拿后面的东西', 'c8整理头发和化妆', 'c9和其他乘客说话']
    predicted_class = classes[class_idx]
    predicted_prob = prediction[0][class_idx]
    return predicted_class, predicted_prob

# 定义视频流处理函数
def process_video_stream():
    cap = cv2.VideoCapture(0)  # 从摄像头读取视频流（可以修改为视频文件路径）
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # 转换图像为RGB格式，适应模型输入
        img_resized = cv2.resize(frame, (image_size, image_size))
        img_array = img_to_array(img_resized)
        img_array = np.expand_dims(img_array, axis=0)

        # 使用三种模型提取特征（各自预处理）
        feat_resnet = resnet.predict(resnet_preprocess(np.copy(img_array)))
        feat_xcep = xception.predict(xcep_preprocess(np.copy(img_array)))
        feat_incep = inception.predict(incep_preprocess(np.copy(img_array)))

        # 拼接所有模型的 bottleneck 特征
        bottleneck_feature = np.concatenate([feat_resnet, feat_xcep, feat_incep], axis=1)

        # 用混合分类器做最终预测
        prediction = model_mix.predict(bottleneck_feature)

        # 获取预测结果
        class_idx = np.argmax(prediction[0])
        classes = ['c0安全驾驶', 'c1右手打字', 'c2右手接电话', 'c3左手打字', 'c4左手接电话',
                   'c5调收音机', 'c6喝饮料', 'c7拿后面的东西', 'c8整理头发和化妆', 'c9和其他乘客说话']
        predicted_class = classes[class_idx]
        predicted_prob = prediction[0][class_idx]

        # 在图像上绘制预测结果
        cv2.putText(frame, f"Class: {predicted_class}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.putText(frame, f"Prob: {predicted_prob:.4f}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        # 显示图像
        cv2.imshow("Video Stream", frame)

        # 按 'q' 键退出视频流
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

# 定义线程启动视频流
def start_video_thread():
    video_thread = threading.Thread(target=process_video_stream)
    video_thread.daemon = True  # 设置为守护线程，确保退出时能自动结束
    video_thread.start()

# 定义图形界面
def open_image():
    file_path = filedialog.askopenfilename(title="选择图片", filetypes=[("Image Files", "*.png;*.jpg;*.jpeg")])
    if file_path:
        predicted_class, predicted_prob = predict_single_image(file_path)
        
        # 显示图像
        img = Image.open(file_path)
        img.thumbnail((200, 200))
        img = ImageTk.PhotoImage(img)
        panel_img.config(image=img)
        panel_img.image = img
        
        # 显示预测结果
        result_text = f"预测类别: {predicted_class}\n预测概率: {predicted_prob:.4f}"
        label_result.config(text=result_text)

        # 显示文件路径
        label_file_path.config(text=f"文件路径: {file_path}")

def open_video():
    # 启动视频流处理线程
    start_video_thread()

# 创建窗口
root = tk.Tk()
root.title("图像分类预测")
root.geometry("500x700")
root.configure(bg="#f0f0f0")

# 设置字体样式
font_style = ("Arial", 12)

# 创建按钮和标签
btn_open_image = tk.Button(root, text="选择图片", command=open_image, width=20, height=2, bg="#4CAF50", fg="white", font=font_style)
btn_open_image.pack(pady=20)

btn_open_video = tk.Button(root, text="启动视频流", command=open_video, width=20, height=2, bg="#008CBA", fg="white", font=font_style)
btn_open_video.pack(pady=20)

panel_img = tk.Label(root, bg="#f0f0f0")
panel_img.pack(pady=10)

label_result = tk.Label(root, text="预测结果", font=("Arial", 14, "bold"), justify="left", width=30, height=4, bg="#f0f0f0")
label_result.pack(pady=10)

label_file_path = tk.Label(root, text="文件路径", font=font_style, width=50, height=2, bg="#f0f0f0")
label_file_path.pack(pady=10)

# 启动界面
root.mainloop()








### 添加从文件读取视频流的功能

In [20]:


# 设置模型输入尺寸（统一为224，以兼容所有模型）
image_size = 224  # Xception 原本用 299，可以调整为 224

# 加载模型并设置权重路径
resnet = ResNet50(weights=None, include_top=False, pooling='avg', input_shape=(image_size, image_size, 3))
xception = Xception(weights=None, include_top=False, pooling='avg', input_shape=(image_size, image_size, 3))
inception = InceptionV3(weights=None, include_top=False, pooling='avg', input_shape=(image_size, image_size, 3))

# 加载权重
resnet.load_weights(r"F:\JupyterWorkSpace\BBBBBBS\models\resnet50-imagenet-finetune152.h5", by_name=True)
xception.load_weights(r"F:\JupyterWorkSpace\BBBBBBS\models\xception-imagenet-finetune116.h5", by_name=True)
inception.load_weights(r"F:\JupyterWorkSpace\BBBBBBS\models\inceptionV3-imagenet-finetune172.h5", by_name=True)

# 加载你训练好的融合分类器
model_mix = load_model(r"F:\JupyterWorkSpace\BBBBBBS\models\mixed-model.h5")

# 定义预测函数
def predict_single_image(image_path):
    # 加载图像并调整为模型输入尺寸
    img = load_img(image_path, target_size=(image_size, image_size))
    img_array = img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)

    # 使用三种模型提取特征（各自预处理）
    feat_resnet = resnet.predict(resnet_preprocess(np.copy(img_array)))
    feat_xcep = xception.predict(xcep_preprocess(np.copy(img_array)))
    feat_incep = inception.predict(incep_preprocess(np.copy(img_array)))

    # 拼接所有模型的 bottleneck 特征
    bottleneck_feature = np.concatenate([feat_resnet, feat_xcep, feat_incep], axis=1)

    # 用混合分类器做最终预测
    prediction = model_mix.predict(bottleneck_feature)

    # 获取预测结果并显示
    class_idx = np.argmax(prediction[0])
    classes = ['c0安全驾驶', 'c1右手打字', 'c2右手接电话', 'c3左手打字', 'c4左手接电话',
               'c5调收音机', 'c6喝饮料', 'c7拿后面的东西', 'c8整理头发和化妆', 'c9和其他乘客说话']
    predicted_class = classes[class_idx]
    predicted_prob = prediction[0][class_idx]
    return predicted_class, predicted_prob

# 定义视频流处理函数
def process_video_stream(file_path=None):
    global cap, video_running
    if file_path:
        cap = cv2.VideoCapture(file_path)  # 从文件读取视频流
    else:
        cap = cv2.VideoCapture(0)  # 从摄像头读取视频流
    
    while cap.isOpened() and video_running:
        ret, frame = cap.read()
        if not ret:
            break
        
        # 转换图像为RGB格式，适应模型输入
        img_resized = cv2.resize(frame, (image_size, image_size))
        img_array = img_to_array(img_resized)
        img_array = np.expand_dims(img_array, axis=0)

        # 使用三种模型提取特征（各自预处理）
        feat_resnet = resnet.predict(resnet_preprocess(np.copy(img_array)))
        feat_xcep = xception.predict(xcep_preprocess(np.copy(img_array)))
        feat_incep = inception.predict(incep_preprocess(np.copy(img_array)))

        # 拼接所有模型的 bottleneck 特征
        bottleneck_feature = np.concatenate([feat_resnet, feat_xcep, feat_incep], axis=1)

        # 用混合分类器做最终预测
        prediction = model_mix.predict(bottleneck_feature)

        # 获取预测结果
        class_idx = np.argmax(prediction[0])
        classes = ['c0安全驾驶', 'c1右手打字', 'c2右手接电话', 'c3左手打字', 'c4左手接电话',
                   'c5调收音机', 'c6喝饮料', 'c7拿后面的东西', 'c8整理头发和化妆', 'c9和其他乘客说话']
        predicted_class = classes[class_idx]
        predicted_prob = prediction[0][class_idx]
        
#         # 使用Pillow库在图像上绘制中文
#         pil_img = Image.fromarray(frame)  # 将OpenCV的图像转为Pillow图像
#         draw = ImageDraw.Draw(pil_img)
#         font = ImageFont.load_default()  # 使用默认字体（可以指定其他字体）
        
#         # 在图像上添加中文文本
#         draw.text((10, 30), f"类别: {predicted_class}", font=font, fill=(0, 255, 0))
#         draw.text((10, 60), f"概率: {predicted_prob:.4f}", font=font, fill=(0, 255, 0))

#         # 转回OpenCV格式
#         frame = np.array(pil_img)
        
        # 在图像上绘制预测结果
        cv2.putText(frame, f"Class: {predicted_class}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.putText(frame, f"Prob: {predicted_prob:.4f}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        # 显示图像
        cv2.imshow("Video Stream", frame)

        # 按 'q' 键退出视频流
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

# 定义线程启动视频流
def start_video_thread(file_path=None):
    global video_running
    video_running = True  # 启动视频流标记
    video_thread = threading.Thread(target=process_video_stream, args=(file_path,))
    video_thread.daemon = True  # 设置为守护线程，确保退出时能自动结束
    video_thread.start()

# 定义停止视频流函数
def stop_video_stream():
    global video_running
    video_running = False  # 停止视频流标记
    if cap.isOpened():
        cap.release()
    cv2.destroyAllWindows()

# 定义图形界面
def open_image():
    file_path = filedialog.askopenfilename(title="选择图片", filetypes=[("Image Files", "*.png;*.jpg;*.jpeg")])
    if file_path:
        predicted_class, predicted_prob = predict_single_image(file_path)
        
        # 显示图像
        img = Image.open(file_path)
        img.thumbnail((200, 200))
        img = ImageTk.PhotoImage(img)
        panel_img.config(image=img)
        panel_img.image = img
        
        # 显示预测结果
        result_text = f"预测类别: {predicted_class}\n预测概率: {predicted_prob:.4f}"
        label_result.config(text=result_text)

        # 显示文件路径
        label_file_path.config(text=f"文件路径: {file_path}")

def open_video_from_load():
    # 让用户选择视频文件
    file_path = filedialog.askopenfilename(title="选择视频文件", filetypes=[("Video Files", "*.mp4;*.avi;*.mov")])
    if file_path:
        # 启动视频流处理线程
        start_video_thread(file_path)

def open_video():
    # 启动视频流处理线程
    start_video_thread()

def close_video():
    # 停止视频流
    stop_video_stream()

# 创建窗口
root = tk.Tk()
root.title("图像分类预测")
root.geometry("500x700")
root.configure(bg="#f0f0f0")

# 设置字体样式
font_style = ("Arial", 12)

# 创建按钮和标签
btn_open_image = tk.Button(root, text="选择图片", command=open_image, width=20, height=2, bg="#4CAF50", fg="white", font=font_style)
btn_open_image.pack(pady=20)

btn_open_video = tk.Button(root, text="启动视频流", command=open_video, width=20, height=2, bg="#008CBA", fg="white", font=font_style)
btn_open_video.pack(pady=20)

btn_open_video_from_load = tk.Button(root, text="选择视频文件", command=open_video_from_load, width=20, height=2, bg="#008CBA", fg="white", font=font_style)
btn_open_video_from_load.pack(pady=20)

btn_close_video = tk.Button(root, text="关闭视频流", command=close_video, width=20, height=2, bg="#f44336", fg="white", font=font_style)
btn_close_video.pack(pady=20)

panel_img = tk.Label(root, bg="#f0f0f0")
panel_img.pack(pady=10)

label_result = tk.Label(root, text="预测结果", font=("Arial", 14, "bold"), justify="left", width=30, height=4, bg="#f0f0f0")
label_result.pack(pady=10)

label_file_path = tk.Label(root, text="文件路径", font=font_style, width=50, height=2, bg="#f0f0f0")
label_file_path.pack(pady=10)

# 启动界面
root.mainloop()




### 解决中文乱码问题

In [9]:


# 设置模型输入尺寸（统一为224，以兼容所有模型）
image_size = 224  # Xception 原本用 299，可以调整为 224

# 加载模型并设置权重路径
resnet = ResNet50(weights=None, include_top=False, pooling='avg', input_shape=(image_size, image_size, 3))
xception = Xception(weights=None, include_top=False, pooling='avg', input_shape=(image_size, image_size, 3))
inception = InceptionV3(weights=None, include_top=False, pooling='avg', input_shape=(image_size, image_size, 3))

# 加载权重
resnet.load_weights(r"F:\JupyterWorkSpace\BBBBBBS\models\resnet50-imagenet-finetune152.h5", by_name=True)
xception.load_weights(r"F:\JupyterWorkSpace\BBBBBBS\models\xception-imagenet-finetune116.h5", by_name=True)
inception.load_weights(r"F:\JupyterWorkSpace\BBBBBBS\models\inceptionV3-imagenet-finetune172.h5", by_name=True)

# 加载你训练好的融合分类器
model_mix = load_model(r"F:\JupyterWorkSpace\BBBBBBS\models\mixed-model.h5")

# 定义预测函数
def predict_single_image(image_path):
    # 加载图像并调整为模型输入尺寸
    img = load_img(image_path, target_size=(image_size, image_size))
    img_array = img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)

    # 使用三种模型提取特征（各自预处理）
    feat_resnet = resnet.predict(resnet_preprocess(np.copy(img_array)))
    feat_xcep = xception.predict(xcep_preprocess(np.copy(img_array)))
    feat_incep = inception.predict(incep_preprocess(np.copy(img_array)))

    # 拼接所有模型的 bottleneck 特征
    bottleneck_feature = np.concatenate([feat_resnet, feat_xcep, feat_incep], axis=1)

    # 用混合分类器做最终预测
    prediction = model_mix.predict(bottleneck_feature)

    # 获取预测结果并显示
    class_idx = np.argmax(prediction[0])
    classes = ['c0安全驾驶', 'c1右手打字', 'c2右手接电话', 'c3左手打字', 'c4左手接电话',
               'c5调收音机', 'c6喝饮料', 'c7拿后面的东西', 'c8整理头发和化妆', 'c9和其他乘客说话']
    predicted_class = classes[class_idx]
    predicted_prob = prediction[0][class_idx]
    return predicted_class, predicted_prob

# 定义视频流处理函数
def process_video_stream(file_path=None):
    global cap, video_running
    if file_path:
        cap = cv2.VideoCapture(file_path)  # 从文件读取视频流
    else:
        cap = cv2.VideoCapture(0)  # 从摄像头读取视频流
    
    while cap.isOpened() and video_running:
        ret, frame = cap.read()
        if not ret:
            break
        
        # 转换图像为RGB格式，适应模型输入
        img_resized = cv2.resize(frame, (image_size, image_size))
        img_array = img_to_array(img_resized)
        img_array = np.expand_dims(img_array, axis=0)

        # 使用三种模型提取特征（各自预处理）
        feat_resnet = resnet.predict(resnet_preprocess(np.copy(img_array)))
        feat_xcep = xception.predict(xcep_preprocess(np.copy(img_array)))
        feat_incep = inception.predict(incep_preprocess(np.copy(img_array)))

        # 拼接所有模型的 bottleneck 特征
        bottleneck_feature = np.concatenate([feat_resnet, feat_xcep, feat_incep], axis=1)

        # 用混合分类器做最终预测
        prediction = model_mix.predict(bottleneck_feature)

        # 获取预测结果
        class_idx = np.argmax(prediction[0])
        classes = ['c0安全驾驶', 'c1右手打字', 'c2右手接电话', 'c3左手打字', 'c4左手接电话',
                   'c5调收音机', 'c6喝饮料', 'c7拿后面的东西', 'c8整理头发和化妆', 'c9和其他乘客说话']
        predicted_class = classes[class_idx]
        predicted_prob = prediction[0][class_idx]
        
#         # 使用Pillow库在图像上绘制中文
#         pil_img = Image.fromarray(frame)  # 将OpenCV的图像转为Pillow图像
#         draw = ImageDraw.Draw(pil_img)
#         font = ImageFont.load_default()  # 使用默认字体（可以指定其他字体）
#         # 在图像上添加中文文本
#         draw.text((10, 30), f"类别: {predicted_class}", font=font, fill=(0, 255, 0))
#         draw.text((10, 60), f"概率: {predicted_prob:.4f}", font=font, fill=(0, 255, 0))
#         # 转回OpenCV格式
#         frame = np.array(pil_img)

        # 使用Pillow库在图像上绘制中文
        pil_img = Image.fromarray(frame)  # 将OpenCV的图像转为Pillow图像
        draw = ImageDraw.Draw(pil_img)
        # 加载支持中文的字体
        # 指定字体，使用默认报错
        font_path = r"C:\Windows\Fonts\simhei.ttf"  # Windows 系统上的 SimHei 字体路径
        font = ImageFont.truetype(font_path, 30)  # 设置字体和大小
        # 在图像上添加中文文本
        draw.text((10, 30), f"类别: {predicted_class}", font=font, fill=(0, 255, 0))
        draw.text((10, 60), f"概率: {predicted_prob:.4f}", font=font, fill=(0, 255, 0))
        # 转回OpenCV格式
        frame = np.array(pil_img)
        
#         # 在图像上绘制预测结果
#         # 冗余
#         cv2.putText(frame, f"Class: {predicted_class}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
#         cv2.putText(frame, f"Prob: {predicted_prob:.4f}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        # 显示图像
        cv2.imshow("Video Stream", frame)

        # 按 'q' 键退出视频流
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

# 定义线程启动视频流
def start_video_thread(file_path=None):
    global video_running
    video_running = True  # 启动视频流标记
    video_thread = threading.Thread(target=process_video_stream, args=(file_path,))
    video_thread.daemon = True  # 设置为守护线程，确保退出时能自动结束
    video_thread.start()

# 定义停止视频流函数
def stop_video_stream():
    global video_running
    video_running = False  # 停止视频流标记
    if cap.isOpened():
        cap.release()
    cv2.destroyAllWindows()

# 定义图形界面
def open_image():
    file_path = filedialog.askopenfilename(title="选择图片", filetypes=[("Image Files", "*.png;*.jpg;*.jpeg")])
    if file_path:
        predicted_class, predicted_prob = predict_single_image(file_path)
        
        # 显示图像
        img = Image.open(file_path)
        img.thumbnail((200, 200))
        img = ImageTk.PhotoImage(img)
        panel_img.config(image=img)
        panel_img.image = img
        
        # 显示预测结果
        result_text = f"预测类别: {predicted_class}\n预测概率: {predicted_prob:.4f}"
        label_result.config(text=result_text)

        # 显示文件路径
        label_file_path.config(text=f"文件路径: {file_path}")

def open_video_from_load():
    # 让用户选择视频文件
    file_path = filedialog.askopenfilename(title="选择视频文件", filetypes=[("Video Files", "*.mp4;*.avi;*.mov")])
    if file_path:
        # 启动视频流处理线程
        start_video_thread(file_path)

def open_video():
    # 启动视频流处理线程
    start_video_thread()

def close_video():
    # 停止视频流
    stop_video_stream()

# 创建窗口
root = tk.Tk()
root.config(bg="#f4e1f1")  # 或者 root = tk.Tk(bg="lightblue")
root.title("图像分类预测")
root.geometry("800x700")
# root.configure(bg="#f0f0f0")

# 设置字体样式
font_style = ("Arial", 12)

# 创建按钮和标签
btn_open_image = tk.Button(root, text="选择图片", command=open_image, width=20, height=2, bg="#e0f7fa", fg="black", font=font_style)
btn_open_image.pack(pady=20)

btn_open_video = tk.Button(root, text="启动视频流", command=open_video, width=20, height=2, bg="#e0f7fa", fg="black", font=font_style)
btn_open_video.pack(pady=20)

btn_open_video_from_load = tk.Button(root, text="选择视频文件", command=open_video_from_load, width=20, height=2, bg="#e0f7fa", fg="black", font=font_style)
btn_open_video_from_load.pack(pady=20)

btn_close_video = tk.Button(root, text="关闭视频流", command=close_video, width=20, height=2, bg="#e0f7fa", fg="black", font=font_style)
btn_close_video.pack(pady=20)

panel_img = tk.Label(root, bg="#e0f7fa")
panel_img.pack(pady=10)

label_result = tk.Label(root, text="预测结果", font=("Arial", 14, "bold"), justify="left", width=30, height=4, bg="#e0f7fa")
label_result.pack(pady=10)

label_file_path = tk.Label(root, text="文件路径", font=font_style, width=80, height=2, bg="#e0f7fa")
label_file_path.pack(pady=10)

# 启动界面
root.mainloop()






