In [1]:
import os
import glob
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, LSTM, Embedding, Dropout
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.optimizers import SGD
from sklearn.utils.class_weight import compute_class_weight
from keras.callbacks import EarlyStopping ,ModelCheckpoint
from sklearn.preprocessing import StandardScaler
from keras.layers import LayerNormalization

print('lstm')



lstm


# Data Preprocessing

In [46]:
FILE_DST = "/kaggle/input/badminton-pose-estimation/move_dataset/"
n=30 # one element has n frames

In [47]:
train = []
label = []

#### **所有的 frame 都放進去資料裡**

In [48]:
for num in range(1,101):
    
    file_name = '%05d' % num
    
    if not os.path.exists(f'{FILE_DST}{file_name}_A_move.npy'):
        continue
    
    A_player = np.load(f'{FILE_DST}{file_name}_A_move.npy')
    B_player = np.load(f'{FILE_DST}{file_name}_B_move.npy')
    A_label = np.load(f'{FILE_DST}{file_name}_A_hit_label.npy')
    B_label = np.load(f'{FILE_DST}{file_name}_B_hit_label.npy')
    
    for i in range(n, len(A_player)):
        array = np.array(A_player[i-n:i])
        train.append(array)
        label.append(A_label[i])
        
    for i in range(n, len(B_player)):
        array = np.array(B_player[i-n:i])
        train.append(array)
        label.append(B_label[i])

#### **其他 label=1 的 frame 都放進去資料裡**

In [49]:
for num in range(101,801):
    
    file_name = '%05d' % num
    
    if not os.path.exists(f'{FILE_DST}{file_name}_A_move.npy'):
        continue
    
    A_player = np.load(f'{FILE_DST}{file_name}_A_move.npy')
    B_player = np.load(f'{FILE_DST}{file_name}_B_move.npy')
    A_label = np.load(f'{FILE_DST}{file_name}_A_hit_label.npy')
    B_label = np.load(f'{FILE_DST}{file_name}_B_hit_label.npy')
    
    for i in range(n, len(A_player)):
        if (A_label[i] == 1):
            array = np.array(A_player[i-n:i])
            train.append(array)
            label.append(A_label[i])
        
    for i in range(n, len(B_player)):
        if (B_label[i] == 1):
            array = np.array(B_player[i-n:i])
            train.append(array)
            label.append(B_label[i])

In [50]:
train = np.array(train)
train_data = train.reshape((train.shape[0],train.shape[1],51))

label_data = np.array(label)
print(train_data.shape, label_data.shape)

(73119, 30, 51) (73119,)


### **Random Shuffle**

In [None]:
state = np.random.get_state()
np.random.shuffle(train_data)

np.random.set_state(state)
np.random.shuffle(label_data)

### **Spilt train and test**

In [63]:
length = int(len(train)*0.7)
X_train = train_data[:length]
y_train = label_data[:length]
X_test = train_data[length:]
y_test = label_data[length:]

print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

(51183, 30, 51) (21936, 30, 51)
(51183,) (21936,)


#### **Balance label**

In [64]:
class_weights = compute_class_weight('balanced', classes = np.unique(y_train) ,y= y_train)
class_weights_dict = {i: class_weights[i] for i in range(len(class_weights))}
print(class_weights_dict)


y_train = keras.utils.to_categorical(y_train, num_classes=2)
y_test = keras.utils.to_categorical(y_test, num_classes=2)

print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

{0: 0.8652207721955507, 1: 1.1845174728072205}
(51183, 30, 51) (21936, 30, 51)
(51183, 2) (21936, 2)


In [65]:
model = Sequential()

model.add(LSTM(units = 64, 
               dropout=0.2, 
               return_sequences = True, 
               activation = 'sigmoid', 
               input_shape = (X_train.shape[1], X_train.shape[2])))
model.add(LayerNormalization(axis = 1))

model.add(LSTM(units = 32, dropout=0.2, activation = 'sigmoid', input_shape = (X_train.shape[1], X_train.shape[2])))
model.add(LayerNormalization(axis = 1))

model.add(Dense(32, activation='relu'))
model.add(Dense(2, activation='softmax'))

metrics = [
        keras.metrics.BinaryAccuracy(name='accuracy'),
#         keras.metrics.Recall(name='recall'),
#         keras.metrics.AUC(name='auc'),
#         keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
    ]

model.compile(optimizer=SGD(learning_rate=0.001), loss='categorical_crossentropy', metrics=metrics)

print(model.summary())


Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_4 (LSTM)               (None, 30, 64)            29696     
                                                                 
 layer_normalization_4 (Laye  (None, 30, 64)           60        
 rNormalization)                                                 
                                                                 
 lstm_5 (LSTM)               (None, 32)                12416     
                                                                 
 layer_normalization_5 (Laye  (None, 32)               64        
 rNormalization)                                                 
                                                                 
 dense_4 (Dense)             (None, 32)                1056      
                                                                 
 dense_5 (Dense)             (None, 2)                

In [66]:
checkpoint =ModelCheckpoint(
    filepath="/kaggle/working/LSTM_best.h5",
    save_weights_only=True,
    monitor='val_accuracy',
    save_best_only=True)
early_stop = EarlyStopping(monitor='val_accuracy', patience=10 ,min_delta=0.0001)
model.fit(X_train, y_train, epochs = 100, batch_size = 128, class_weight = class_weights_dict,validation_split=0.2,callbacks=[checkpoint, early_stop])


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100


<keras.callbacks.History at 0x7a58e7eb6ec0>

# Validate Model 

In [67]:
# Re-evaluate the model
loss, acc = model.evaluate(X_test, y_test, verbose=2)
print("Restored model, accuracy: {:5.2f}%".format(100 * acc))

686/686 - 6s - loss: 0.7549 - accuracy: 0.4372 - 6s/epoch - 9ms/step
Restored model, accuracy: 43.72%


In [68]:
from sklearn.metrics import precision_score, recall_score, f1_score 
pred=model.predict(X_test)



In [69]:
print(np.argmax(pred, axis=1))

zero = 0
ones = 0
for i in np.argmax(pred, axis=1):
    if i==0:
        zero+=1
    else:
        ones+=1
print("\n預測1,0數量：")        
print('1 ', ones)
print('0 ', zero)

zero = 0
ones = 0
for i in np.argmax(y_test, axis=1):
    if i==0:
        zero+=1
    else:
        ones+=1
print("\n原本1,0數量：")        
print('1 ', ones)
print('0 ', zero)

[1 1 1 ... 1 1 1]

預測1,0數量：
1  19728
0  2208

原本1,0數量：
1  9277
0  12659


In [70]:
print(f"Precision Score: {precision_score(np.argmax(y_test, axis=1), np.argmax(pred, axis=1))*100:.2f}%")
print(f"Recall Score: {recall_score(np.argmax(y_test, axis=1), np.argmax(pred, axis=1))*100:.2f}%")
print(f"F1 score: {f1_score(np.argmax(y_test, axis=1), np.argmax(pred, axis=1))*100:.2f}%")

Precision Score: 42.22%
Recall Score: 89.79%
F1 score: 57.44%
