In [None]:
import tensorflow as tf;
import os;
import cv2;
import numpy as np;
import tqdm;
from sklearn.preprocessing import LabelBinarizer;
import matplotlib.pyplot as plt

In [None]:
BASE_PATH = '/Users/nibabi/Desktop/skateboard_trick_classification/Tricks'
VIDEOS_PATH = os.path.join(BASE_PATH, '**','*.mov')
SEQUENCE_LENGTH = 100

In [None]:
def frame_generator():
    video_paths = tf.io.gfile.glob(VIDEOS_PATH)
    np.random.shuffle(video_paths)
    for video_path in video_paths:
        
        cap = cv2.VideoCapture(video_path)
        num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        sample_every_frame = max(1, num_frames // SEQUENCE_LENGTH)
        current_frame = 0
        
        max_images = SEQUENCE_LENGTH
        while True:
            success, frame = cap.read()
            if not success:
                break

            if current_frame % sample_every_frame == 0:
                # OPENCV reads in BGR, tensorflow expects RGB so we invert the order
                frame = frame[:, :, ::-1]
                img = tf.image.resize(frame, (299, 299))
                # 数据增强操作
                img = tf.image.random_flip_left_right(img)  # 随机水平翻转
                img = tf.image.random_flip_up_down(img)     # 随机垂直翻转
                
                # 随机调整亮度和对比度
                img = tf.image.random_brightness(img, max_delta=0.3)
                img = tf.image.random_contrast(img, lower=0.8, upper=1.2)
                
                img = tf.keras.applications.inception_v3.preprocess_input(img)
                max_images -= 1
                
                yield img, video_path

            current_frame += 1

            if max_images == 0:
                break

dataset = tf.data.Dataset.from_generator(frame_generator,
             output_types=(tf.float32, tf.string),
             output_shapes=((299, 299, 3), ()))

dataset = dataset.batch(16).prefetch(tf.data.experimental.AUTOTUNE)

In [None]:
"""
用inception_v3提取特征
"""

inception_v3 = tf.keras.applications.InceptionV3(include_top=False, weights='imagenet')

x = inception_v3.output

pooling_output = tf.keras.layers.GlobalAveragePooling2D()(x)

feature_extraction_model = tf.keras.Model(inception_v3.input, pooling_output)

current_path = None
all_features = []

for img, batch_paths in tqdm.tqdm(dataset):
    batch_features = feature_extraction_model(img)
    batch_features = tf.reshape(batch_features, (batch_features.shape[0], -1))
    
    for features, path in zip(batch_features.numpy(), batch_paths.numpy()):
        if path != current_path and current_path is not None:
            output_path = current_path.decode().replace('.mov', '.npy')
            np.save(output_path, all_features)
            all_features = []
            
        current_path = path
        all_features.append(features)
        
if all_features:
    output_path = current_path.decode().replace('.mov', '.npy')
    np.save(output_path, all_features)
        

In [None]:
# LABELS = ['Ollie','Kickflip','Shuvit'] 
# encoder = LabelBinarizer()
# encoder.fit(LABELS)

# # LSTM + CNN
# def generate_lstmfcn(MAX_SEQUENCE_LENGTH, NB_CLASS, NUM_CELLS=8):

#     ip = tf.keras.Input(shape=(MAX_SEQUENCE_LENGTH,2048))

#     x = tf.keras.layers.LSTM(NUM_CELLS)(ip)
#     x = tf.keras.layers.Dropout(0.5)(x)

#     y = tf.keras.layers.Permute((2, 1))(ip)
#     y = tf.keras.layers.Conv1D(128, 8, padding='same', kernel_initializer='he_uniform')(y)
#     y = tf.keras.layers.BatchNormalization()(y)
#     y = tf.keras.layers.Activation('relu')(y)
#     y =  tf.keras.layers.Dropout(0.5)(y)

#     y = tf.keras.layers.Conv1D(256, 5, padding='same', kernel_initializer='he_uniform')(y)
#     y = tf.keras.layers.BatchNormalization()(y)
#     y = tf.keras.layers.Activation('relu')(y)
#     y =  tf.keras.layers.Dropout(0.5)(y)

#     y = tf.keras.layers.Conv1D(128, 3, padding='same', kernel_initializer='he_uniform')(y)
#     y = tf.keras.layers.BatchNormalization()(y)
#     y = tf.keras.layers.Activation('relu')(y)
#     y =  tf.keras.layers.Dropout(0.5)(y)

#     y = tf.keras.layers.GlobalAveragePooling1D()(y)

#     x = tf.keras.layers.concatenate([x, y])
    
#     x = tf.keras.layers.Dense(512, activation='relu')(x)  # 添加额外的全连接层
#     x = tf.keras.layers.Dropout(0.5)(x)

#     out = tf.keras.layers.Dense(NB_CLASS, activation='softmax')(x)

#     model = tf.keras.Model(ip, out)

#     model.summary()

#     # add load model code here to fine-tune

#     return model

In [None]:
# 常规LSTM

# LABELS = ['Ollie','Kickflip','Shuvit'] 
LABELS = ['Ollie','Kickflip'] 
encoder = LabelBinarizer()
encoder.fit(LABELS)
model = tf.keras.Sequential([
    tf.keras.layers.Masking(mask_value=0.),
    tf.keras.layers.LSTM(512, dropout=0.5, recurrent_dropout=0.5),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    # tf.keras.layers.Dense(len(LABELS), activation='softmax')
    tf.keras.layers.Dense(len(LABELS), activation='sigmoid')
])

In [None]:
with open('/Users/nibabi/Desktop/skateboard_trick_classification/dev.txt') as f:
    test_list = [row.strip() for row in list(f)]

with open('/Users/nibabi/Desktop/skateboard_trick_classification/train.txt') as f:
    train_list = [row.strip() for row in list(f)]
    train_list = [row.split(' ')[0] for row in train_list]

In [None]:
def make_generator(file_list):
    def generator():
        np.random.shuffle(file_list)
        for path in file_list:
            full_path = os.path.join(BASE_PATH + '/', path).replace('.mov', '.npy')

            label = os.path.basename(os.path.dirname(path))
            features = np.load(full_path)

            padded_sequence = np.zeros((SEQUENCE_LENGTH, 2048))
            padded_sequence[0:len(features)] = np.array(features)

            # transformed_label = encoder.transform([label])
            # yield padded_sequence, transformed_label[0]
            
            transformed_label = encoder.transform([label])[0]
            transformed_label = np.hstack([transformed_label, 1 - transformed_label])
            # 确保产生的标签形状为 (2,)
            yield padded_sequence, transformed_label
            
    return generator

train_dataset = tf.data.Dataset.from_generator(make_generator(train_list),
                 output_types=(tf.float32, tf.int16),
                 output_shapes=((SEQUENCE_LENGTH, 2048), (len(LABELS))))
train_dataset = train_dataset.batch(16).prefetch(tf.data.experimental.AUTOTUNE)


valid_dataset = tf.data.Dataset.from_generator(make_generator(test_list),
                 output_types=(tf.float32, tf.int16),
                 output_shapes=((SEQUENCE_LENGTH, 2048), (len(LABELS))))
valid_dataset = valid_dataset.batch(16).prefetch(tf.data.experimental.AUTOTUNE)

In [None]:
# 构建模型
# MAX_SEQUENCE_LENGTH = 100
# NB_CLASS = 2
# model = generate_lstmfcn(MAX_SEQUENCE_LENGTH, NB_CLASS)

# 编译模型
model.compile(loss='binary_crossentropy',
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
              metrics=['accuracy', 'top_k_categorical_accuracy'])


tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir='log', update_freq=1000)
history = model.fit(train_dataset, epochs=100, callbacks=[tensorboard_callback], validation_data=valid_dataset)

# 提取训练和验证的损失和准确度
train_loss = history.history['loss']
val_loss = history.history['val_loss']
train_accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

# 绘制损失曲线
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(train_loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.title('Loss Curve')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

# 绘制准确度曲线
plt.subplot(1, 2, 2)
plt.plot(train_accuracy, label='Training Accuracy')
plt.plot(val_accuracy, label='Validation Accuracy')
plt.title('Accuracy Curve')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

In [None]:
model.save('model_2023_11_17.h5')

In [90]:
with open('/Users/nibabi/Desktop/skateboard_trick_classification/test.txt') as f:
    unknown_dataset = [row.strip() for row in list(f)]
print(unknown_dataset)

def make_generator_test(file_list):
    def generator_test():
        for path in file_list:
            full_path = os.path.join(BASE_PATH + '/', path).replace('.mov', '.npy')
            features = np.load(full_path)

            padded_sequence = np.zeros((SEQUENCE_LENGTH, 2048))
            padded_sequence[0:len(features)] = np.array(features)


            yield padded_sequence  # 不生成标签

    return generator_test

unknown_dataset = tf.data.Dataset.from_generator(
    make_generator_test(unknown_dataset),
    output_types=tf.float32,  # 只有特征，不需要定义标签的类型
    output_shapes=(SEQUENCE_LENGTH, 2048)  # 只有特征的形状
)

unknown_dataset = unknown_dataset.batch(16).prefetch(tf.data.experimental.AUTOTUNE)

predict = model.predict(unknown_dataset)


print(predict)

predicted_labels = np.argmax(predict, axis=1)
print(predicted_labels)






['Kickflip/Kickflip107.mov', 'Kickflip/Kickflip18.mov', 'Ollie/Ollie11.mov', 'Kickflip/Kickflip56.mov', 'Kickflip/Kickflip82.mov', 'Kickflip/Kickflip48.mov', 'Ollie/Ollie37.mov', 'Ollie/Ollie62.mov', 'Ollie/Ollie80.mov', 'Ollie/Ollie54.mov', 'Kickflip/Kickflip74.mov', 'Kickflip/Kickflip43.mov', 'Kickflip/Kickflip58.mov', 'Kickflip/Kickflip14.mov', 'Ollie/Ollie99.mov', 'Ollie/Ollie105.mov', 'Ollie/Ollie26.mov', 'Ollie/Ollie23.mov', 'Ollie/Ollie10.mov', 'Kickflip/Kickflip91.mov', 'Ollie/Ollie84.mov', 'Ollie/Ollie74.mov', 'Kickflip/Kickflip103.mov']


2023-11-17 17:13:56.380723: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


[[0.03905711 0.949884  ]
 [0.70882136 0.27686268]
 [0.42863438 0.5046203 ]
 [0.2883597  0.6155926 ]
 [0.03856493 0.9445794 ]
 [0.2274671  0.73544645]
 [0.812379   0.17566723]
 [0.55639756 0.48944595]
 [0.87919664 0.15617995]
 [0.9166672  0.10797974]
 [0.05663994 0.94277316]
 [0.49790537 0.43498817]
 [0.43083957 0.46019778]
 [0.4441958  0.51588845]
 [0.62815565 0.20274135]
 [0.52104396 0.3523688 ]
 [0.40169728 0.40218893]
 [0.9255199  0.08680146]
 [0.8440991  0.14067979]
 [0.04662992 0.94844216]
 [0.2708345  0.5504886 ]
 [0.87203085 0.11537561]
 [0.06429199 0.92152065]]
[1 0 1 1 1 1 0 0 0 0 1 0 1 1 0 0 1 0 0 1 1 0 1]
