In [83]:
from tensorflow.keras.preprocessing import image as image_utils
from tensorflow.keras.applications.vgg19 import preprocess_input

def load_and_process_image(image_path):
    # Print image's original shape, for reference
    print('Original image shape: ', mpimg.imread(image_path).shape)
    
    # Load in the image with a target size of 200,200
    image = image_utils.load_img(image_path, target_size=(200, 200))
    # Convert the image from a PIL format to a numpy array
    image = image_utils.img_to_array(image)
    # Add a dimension for number of images, in our case 1
    image = image.reshape(1,200,200,3)
    # Preprocess image to align with original ImageNet dataset
    image = preprocess_input(image)
    # Print image's shape after processing
    print('Processed image shape: ', image.shape)
    return image

In [84]:
#进行预测
from tensorflow.keras.applications.vgg19 import decode_predictions

def readable_prediction(image_path):
    # Show image
    show_image(image_path)
    # Load and pre-process image
    image = load_and_process_image(image_path)
    # Make predictions
    predictions = model.predict(image)
    # Print predictions in readable form
    print('Predicted:', decode_predictions(predictions, top=3))

In [85]:
from tensorflow import keras

base_model = keras.applications.VGG19(
    weights='imagenet',  # Load weights pre-trained on ImageNet.
    input_shape=(200, 200, 3),
    include_top=False)

In [86]:
base_model.summary()

Model: "vgg19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_13 (InputLayer)        [(None, 200, 200, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 200, 200, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 200, 200, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 100, 100, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 100, 100, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 100, 100, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 50, 50, 128)       0     

In [87]:
base_model.trainable = False

In [88]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

inputs = keras.Input(shape=(200,200,3))
# Separately from setting trainable on the model, we set training to ase 
x = base_model(inputs, training=False)
x = keras.layers.GlobalAveragePooling2D()(x)
# A Dense classifier with a single unit (binary classification)
outputs = keras.layers.Dense(4,activation='softmax')(x) 
################注意!此处只有一个分类

model = keras.Model(inputs, outputs)

In [89]:
model.summary()

Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_14 (InputLayer)        [(None, 200, 200, 3)]     0         
_________________________________________________________________
vgg19 (Functional)           (None, 6, 6, 512)         20024384  
_________________________________________________________________
global_average_pooling2d_7 ( (None, 512)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 4)                 2052      
Total params: 20,026,436
Trainable params: 2,052
Non-trainable params: 20,024,384
_________________________________________________________________


In [90]:
# Important to use binary crossentropy and binary accuracy as we now have a binary classification problem
model.compile(loss='categorical_crossentropy', metrics=['accuracy'])

In [91]:
#可以先简化问题
from tensorflow.keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
        featurewise_center=True,  # set input mean to 0 over the dataset
        samplewise_center=True,  # set each sample mean to 0
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False) # we don't expect Bo to be upside-down so we will not flip vertically

In [92]:
'''
    # load and iterate training dataset
    train_it = datagen.flow_from_directory("C:/Users/Admin/Desktop/ASL数据集2-kaggle/asl_alphabet_train/asl_alphabet_train", 
                                        target_size=(200, 200), 
                                        color_mode='rgb', 
                                        batch_size=8)
    # load and iterate validation dataset
    valid_it = datagen.flow_from_directory('C:/Users/Admin/Desktop/ASL数据集2-kaggle/asl_alphabet_train/asl_alphabet_valid', 
                                        target_size=(200, 200), 
                                        color_mode='rgb',  
                                        batch_size=8)
'''

'\n    # load and iterate training dataset\n    train_it = datagen.flow_from_directory("C:/Users/Admin/Desktop/ASL数据集2-kaggle/asl_alphabet_train/asl_alphabet_train", \n                                        target_size=(200, 200), \n                                        color_mode=\'rgb\', \n                                        batch_size=8)\n    # load and iterate validation dataset\n    valid_it = datagen.flow_from_directory(\'C:/Users/Admin/Desktop/ASL数据集2-kaggle/asl_alphabet_train/asl_alphabet_valid\', \n                                        target_size=(200, 200), \n                                        color_mode=\'rgb\',  \n                                        batch_size=8)\n'

In [93]:
'''
    # load and iterate training dataset
    train_it = datagen.flow_from_directory( "C:/Users/Admin/Desktop/ASL数据集2-kaggle/asl_alphabet_train/asl_alphabet_train", 
                                        target_size=(200, 200), 
                                        color_mode='rgb', 
                                        batch_size=8,
                                        validation_split=0.)
'''                                   


'\n    # load and iterate training dataset\n    train_it = datagen.flow_from_directory( "C:/Users/Admin/Desktop/ASL数据集2-kaggle/asl_alphabet_train/asl_alphabet_train", \n                                        target_size=(200, 200), \n                                        color_mode=\'rgb\', \n                                        batch_size=8,\n                                        validation_split=0.)\n'

In [94]:
#pip install --upgrade tensorflow

In [2]:
import tensorflow as tf
train_it = tf.keras.preprocessing.image_dataset_from_directory('训练集',
image_size=(200,200),subset='validation',validation_split=0.2,seed = 123,label_mode='categorical',shuffle=True )
valid_it = tf.keras.preprocessing.image_dataset_from_directory('训练集',
image_size=(200,200),subset='training',validation_split=0.2,seed = 123,label_mode='categorical',shuffle=True)

Found 11897 files belonging to 4 classes.
Using 2379 files for validation.
Found 11897 files belonging to 4 classes.
Using 9518 files for training.


TypeError: 'BatchDataset' object is not subscriptable

In [96]:
len_data = 9000#输入数据的长度
batch_size1=75
model.fit(train_it,validation_data=valid_it, validation_steps=4, epochs=10,batch_size=batch_size1,steps_per_epoch=len_data/batch_size1)

Epoch 1/10


<keras.callbacks.History at 0x7fc93603c8d0>

In [97]:
model.save('training_modle')

INFO:tensorflow:Assets written to: training_modle/assets


In [106]:
# 模型微调
base_model.trainable = True

# It's important to recompile your model after you make any changes
# to the `trainable` attribute of any inner layer, so that your changes
# are taken into account
model.compile(optimizer=keras.optimizers.RMSprop(learning_rate = .00001),  # Very low learning rate
              loss='categorical_crossentropy', metrics=['accuracy'],)

In [None]:
model.fit(train_it, steps_per_epoch=12, validation_data=valid_it, validation_steps=4, epochs=10)

Epoch 1/10
 2/12 [====>.........................] - ETA: 3:01 - loss: 5.3644e-07 - accuracy: 1.0000

In [100]:
#检查预测结果
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from tensorflow.keras.preprocessing import image as image_utils
from tensorflow.keras.applications.imagenet_utils import preprocess_input

def show_image(image_path):
    image = mpimg.imread(image_path)
    plt.imshow(image)

def make_predictions(image_path):
    show_image(image_path)
    image = image_utils.load_img(image_path, target_size=(200, 200))
    image = image_utils.img_to_array(image)
    image = image.reshape(1,200,200,3)
    image = preprocess_input(image)
    preds = model.predict(image)
    return preds

In [101]:
model.save('training_model_1')

INFO:tensorflow:Assets written to: training_model_1/assets


In [2]:
import cv2  # python中的opencv-python库，著名的计算机视觉库
import numpy as np
#采集视频、显示并保存        
dictionary = {0:' 未知 ',1:'a',2:'b',3:'c',4:'d'}


In [1]:
from tensorflow import keras
model = keras.models.load_model('training_modle')

In [3]:

def capturevideoSave(imgName,camera = 0):
    # camera 表示摄像头，内置摄像头:0, 外接摄像为按照顺序依次为1,2,3...
    cap=cv2.VideoCapture(camera)
    cap.set(cv2.CAP_PROP_FPS, 30)
    global_num = 0 
    while(1):
        ret,frame = cap.read()
        
        
        #图片压缩到合适大小
        frame = cv2.resize(frame,(200,200))
        #######更改图片亮度
        
        #镜像处理显示
        frame_show =np.fliplr(frame)
        cv2.imshow("capture", frame_show)
        frame = frame.reshape(1,200,200,3) 
        frame = frame/255
        prediction = model(frame)
        a = np.max(prediction)
        if a >= 0.4:
            predicted_letter = dictionary[np.argmax(prediction)]
            print(predicted_letter)
            print(a)
        else:
            print('未识别到目标，请重新识别')
            print(a)
        #更改窗口大小
        #cv2.resizeWindow("capture", 640, 480)
        k=cv2.waitKey(1) #等待1ms，获取用户的键盘输入
        
        
        if k==ord(' '): #如果用户输入空格，将当前帧用imgName作为文件名保存
            cv2.imwrite(imgName+'.jpg',frame)
            break
    cap.release()
    cv2.destroyAllWindows()
    return imgName  #返回保存的文件名


In [4]:
capturevideoSave('1')

b
0.49025992
b
0.49329993
b
0.4865405
b
0.47250766
b
0.45286736
b
0.4456797
b
0.46837822
b
0.46866685
b
0.4691648
b
0.46608934
b
0.45912832
b
0.4774534
b
0.4744197
b
0.46743566
b
0.47051296
b
0.48141268
b
0.4761827
b
0.47586322
b
0.4780271
b
0.49656272
b
0.4826325
b
0.4923485
b
0.4765888
b
0.48576018
b
0.46408862
b
0.47790885
b
0.4964571
b
0.49084035
b
0.46782857
b
0.46557084
b
0.46221688
b
0.49372047
b
0.49961215
b
0.51253796
b
0.5157582
b
0.48908234
b
0.489393
b
0.4866784
b
0.49573842
b
0.48982617
b
0.48418552
b
0.4867565
b
0.49644843
b
0.5056465
b
0.5028487
b
0.49958283
b
0.49859366
b
0.4985057
b
0.4806466
b
0.47961038
b
0.49119452
b
0.47807613
b
0.4902393
b
0.4935956
b
0.49577117
b
0.50434417
b
0.49952915
b
0.50363487
b
0.49845424
b
0.4885156
b
0.48814023
b
0.48744366
b
0.4944274
b
0.49261957
b
0.48288006
b
0.47001874
b
0.4712406
b
0.4645219
b
0.44457772
b
0.44143966
b
0.4487027
b
0.4480397
b
0.45042533
b
0.45667896
b
0.47488102
b
0.50934297
b
0.4900237
b
0.5021405
b
0.48541006
b
0

'1'

In [None]:
#手势识别对比

def capturevideoSave(imgName,camera = 0):
    # camera 表示摄像头，内置摄像头:0, 外接摄像为按照顺序依次为1,2,3...
    cap=cv2.VideoCapture(camera)
    cap.set(cv2.CAP_PROP_FPS, 30)
    global_num = 0 
    while(1):
        ret,frame = cap.read()
        
        
        #图片压缩到合适大小
        frame = cv2.resize(frame,(200,200))
        #######更改图片亮度
        
        #镜像处理显示
        frame_show =np.fliplr(frame)
        cv2.imshow("capture", frame_show)
        frame = frame.reshape(1,200,200,3) 
        frame = frame/255
        prediction = model(frame)
        a = np.max(prediction)
        if a >= 0.6:
            predicted_letter = dictionary[np.argmax(prediction)]
            print(predicted_letter)
            print(a)
        else:
            print('未识别到目标，请重新识别')
            print(a)
        #更改窗口大小
        #cv2.resizeWindow("capture", 640, 480)
        k=cv2.waitKey(1) #等待1ms，获取用户的键盘输入
        
        
        if k==ord(' '): #如果用户输入空格，将当前帧用imgName作为文件名保存
            cv2.imwrite(imgName,frame)
            break
    cap.release()
    cv2.destroyAllWindows()
    return imgName  #返回保存的文件名
