In [1]:
import torch
import cv2
import os
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from torch.utils import data as Data
from torchvision import transforms
from torch import nn

In [2]:
# 定义自己的train Dataset
class MyDataset(Data.Dataset):
    def __init__(self, path, label):
        super(MyDataset, self).__init__()
        self.path = path
        self.label = torch.LongTensor(label)  # 标签值为int类型

    def __getitem__(self, index):
        # # 打开图片
        # img_as_img = Image.open(self.path[index])
        # # 设置好需要转换的变量，还可以包括一系列的nomarlize等等操作
        # transform = transforms.Compose([
        #     transforms.Resize((224, 224)),
        #     transforms.ToTensor()
        # ])
        # img = transform(img_as_img)
        
        # 由于图片是灰度图，读出来形状是（1，224，224）而后面检测使用的opencv读出来是（3，244，244）
        # 故采用opencv的方式读取图片        

        img_as_img = cv2.imread(self.path[index], cv2.IMREAD_GRAYSCALE)
        backtorgb = cv2.cvtColor(img_as_img, cv2.COLOR_GRAY2RGB)
        img_size = 224
        new_array = cv2.resize(backtorgb, (img_size, img_size))  # [224,224,3]
        new_array = np.transpose(new_array, [2,0,1])  # 维度变换 [3,224,224]
        img = torch.Tensor(new_array)  # 转换为张量

        label = self.label[index]
        return (img, label)

    def __len__(self):
        return len(self.label)


def read_data(batch_size, train_split, valid_split):

    base_path = "E:\Code\Python\DeepLearningCompetition\eye\mrlEyes_2018_01\s000"
    imgs_pathes = []
    imgs_labels = []
    

    for i in range(9):
        path = base_path + str(i+1)
        imgs = os.listdir(path)  # 获取文件名
        for img in imgs:
            # s0001_00280_0_0_0_0_0_01.png 第三个0表示闭眼，为1则表示睁眼
            label = int(str(img)[14:15])
            imgs_pathes.append(path + "\\" + img)
            imgs_labels.append(label)
  
    dataset = MyDataset(imgs_pathes, imgs_labels)
    # 划分训练集，验证集
    train_size = int(len(dataset) * train_split)
    valid_size = len(dataset) - train_size
    train_dataset, valid_dataset = Data.random_split(dataset, [train_size, valid_size])  # 随机划分训练集和验证集

    train_loader = Data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)  # 加载DataLoader
    valid_loader = Data.DataLoader(valid_dataset, batch_size=batch_size, shuffle=True, num_workers=0)

    return train_loader, valid_loader

In [3]:
# 定义训练函数
def train_model(train_loader, model, criterion, optimizer, device):
    model.train()
    train_loss = []
    train_acc = []

    for i, data in enumerate(train_loader, 0):

        inputs, labels = data[0].cuda(), data[1].cuda()  # 获取数据

        outputs = model(inputs)  # 预测结果
        
        _, pred = outputs.max(1)  # 求概率最大值对应的标签

        num_correct = (pred == labels).sum().item()
        acc = num_correct / len(labels)  # 计算准确率

        loss = criterion(outputs, labels)  # 计算loss
        optimizer.zero_grad()  # 梯度清0
        loss.backward()  # 反向传播
        optimizer.step()  # 更新系数

        train_loss.append(loss.item())
        train_acc.append(acc)

    return np.mean(train_loss), np.mean(train_acc)


# 定义测试函数，具体结构与训练函数相似
def test_model(test_loader, criterion, model, device):
    model.eval()
    test_loss = []
    test_acc = []

    for i, data in enumerate(test_loader, 0):
 
        inputs, labels = data[0].cuda(), data[1].cuda()
        # print(inputs.shape)
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        _, pred = outputs.max(1)

        num_correct = (pred == labels).sum().item()
        acc = num_correct / len(labels)
        # 测试不需要反向传播
        test_loss.append(loss.item())
        test_acc.append(acc)

    return np.mean(test_loss), np.mean(test_acc)

In [4]:
net = nn.Sequential(
    # 这里网络采用了AlexNet的卷积网络    
    nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=1), nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nn.Conv2d(96, 256, kernel_size=5, padding=2), nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nn.Conv2d(256, 384, kernel_size=3, padding=1), nn.ReLU(),
    nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.ReLU(),
    nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nn.Flatten(),
    nn.Linear(6400, 4096), nn.ReLU(),
    nn.Dropout(p=0.5),
    nn.Linear(4096, 4096), nn.ReLU(),
    nn.Dropout(p=0.5),
    nn.Linear(4096, 2))

训练二分类模型

In [9]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
print(device)
model = net.cuda()  # 初始化模型，
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)  # 使用Adam优化算法

batch_size = 256
train_split = 0.9  # 训练集比例
valid_split = 0.1  # 测试机比例
epoch = 10

print('******开始读取数据******')
train_loader, valid_loader = read_data(batch_size, train_split, valid_split)
print('******开始训练模型******')
for epoch in range(epoch):
    time_all = 0
    train_loss, train_acc = train_model(train_loader, model, criterion, optimizer, device)  # 训练模型
    valid_loss, valid_acc = test_model(valid_loader, criterion, model, device)  # 测试模型
    print('- Epoch: %d - Train_loss: %.5f - Train_acc: %.5f - Val_loss: %.5f - Val_acc: %5f'
        % (epoch, train_loss, train_acc, valid_loss, valid_acc))
torch.cuda.empty_cache()

cuda:1
******开始读取数据******
******开始训练模型******
- Epoch: 0 - Train_loss: 0.02191 - Train_acc: 0.99330 - Val_loss: 0.00981 - Val_acc: 0.997786
- Epoch: 1 - Train_loss: 0.00586 - Train_acc: 0.99855 - Val_loss: 0.00782 - Val_acc: 0.996809
- Epoch: 2 - Train_loss: 0.00348 - Train_acc: 0.99922 - Val_loss: 0.00643 - Val_acc: 0.998047
- Epoch: 3 - Train_loss: 0.00491 - Train_acc: 0.99877 - Val_loss: 0.01310 - Val_acc: 0.997786
- Epoch: 4 - Train_loss: 0.00345 - Train_acc: 0.99922 - Val_loss: 0.01543 - Val_acc: 0.998047
- Epoch: 5 - Train_loss: 0.00593 - Train_acc: 0.99900 - Val_loss: 0.00650 - Val_acc: 0.999023
- Epoch: 6 - Train_loss: 0.00796 - Train_acc: 0.99855 - Val_loss: 0.02243 - Val_acc: 0.990950
- Epoch: 7 - Train_loss: 0.01356 - Train_acc: 0.99587 - Val_loss: 0.04169 - Val_acc: 0.987498
- Epoch: 8 - Train_loss: 0.00492 - Train_acc: 0.99877 - Val_loss: 0.00831 - Val_acc: 0.996094
- Epoch: 9 - Train_loss: 0.00412 - Train_acc: 0.99955 - Val_loss: 0.00317 - Val_acc: 0.999023


调用相机进行识别

In [1]:
import winsound
frequency = 2500  # Set frequency to 2500
duration = 1500  # Set duration to 1500 ms == 1.5 sec
import numpy as np
import cv2
path = "haarcascade_frontalface_default.xml"
faceCascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
cap = cv2.VideoCapture(1)
#check if webcam is opened correctly
if not cap.isOpened():
    cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FPS, 5)
counter = 0
model.cpu()  # 将刚刚的模型换成cpu模式
while True:
    ret,frame = cap.read()
    eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    eyes = eye_cascade.detectMultiScale(gray, 1.1, 4)
    for x,y,w,h in eyes:
        roi_gray = gray[y:y+h, x:x+w]
        roi_color = frame[y:y+h, x:x+w]
        cv2.rectangle(frame, (x,y), (x+w,y+h), (0, 255, 0), 2)
        eyess = eye_cascade.detectMultiScale(roi_gray)
        if len(eyess) == 0:
            print("Eyes are not detected")
        else:
            for (ex, ey, ew, eh) in eyess:
                eyes_roi = roi_color[ey: ey+eh, ex: ex+ew]
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    if(faceCascade.empty()==False):
        print("detected")
    faces = faceCascade.detectMultiScale(gray, 1.1, 4)
    # Draw a rectangle around eyes
    for (x,y,w,h) in faces:
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
    font = cv2.FONT_HERSHEY_SIMPLEX
    final_image = cv2.resize(eyes_roi, (224,224))
    final_image = np.expand_dims(final_image, axis=0)
    final_image = final_image/255.0  #（1，224，224，3）
    
    
    img_list = list(final_image)  # 元组转变为列表
    final_image = np.transpose(img_list, [0,3,1,2])  # 变换维度[1,3,224,224]
    input = torch.Tensor(final_image)  # 转变为张量
    _, Predictions = model(input).max(1)  

    
#     Predictions = new_model.predict(final_image)
    if (Predictions>=0.3):
        status = "Open Eyes"
        cv2.putText(frame,
                status,
                (150,150),
                font, 3,
                (0, 255, 0),
                2,
                cv2.LINE_4)
        x1,y1,w1,h1 = 0,0,175,75
        cv2.rectangle(frame, (x1, y1), (x1 + w1, y1 + h1), (0,0,0), -1)
        #Add text
        cv2.putText(frame, 'Active', (x1 + int(w1/10),y1 + int(h1/2)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)
    elif Predictions<0.3:
        counter = counter + 1
        status = "Closed Eyes"
        cv2.putText(frame,
                status,
                (150,150),
                font, 3,
                (0, 0, 255),
                2,
                cv2.LINE_4)
        x1,y1,w1,h1 = 0,0,175,75
        cv2.rectangle(frame, (x1,y1), (x1 + w1, y1 + h1), (0,0,255), 2)
        if counter > 10:
            x1,y1,w1,h1 = 0,0,175,75
            #Draw black background rectangle
            cv2.rectangle(frame, (x1, y1), (x1 + w1, y1 + h1), (0,0,0), -1)
            #Add text
            cv2.putText(frame, "Sleep Alert !!!", (x1 + int(w1/10), y1 + int(h1/2)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2)
            winsound.Beep(frequency, duration)
            counter = 0
    cv2.imshow("Drowsiness Detection", frame)
    if cv2.waitKey(2) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

NameError: name 'model' is not defined