In [11]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [12]:
frames_df = pd.read_csv('frames_pose.csv')
frames_df

Unnamed: 0,video,frame_path,label
0,GcluCxjiSjI,frames_pose/0_0.jpg,tap dancing
1,GcluCxjiSjI,frames_pose/0_1.jpg,tap dancing
2,GcluCxjiSjI,frames_pose/0_2.jpg,tap dancing
3,GcluCxjiSjI,frames_pose/0_3.jpg,tap dancing
4,GcluCxjiSjI,frames_pose/0_4.jpg,tap dancing
...,...,...,...
121497,LT-e_wj6d9w,frames_pose/617_195.jpg,tango dancing
121498,LT-e_wj6d9w,frames_pose/617_196.jpg,tango dancing
121499,LT-e_wj6d9w,frames_pose/617_197.jpg,tango dancing
121500,LT-e_wj6d9w,frames_pose/617_198.jpg,tango dancing


In [13]:
train_frames, test_frames, train_classes, test_classes = train_test_split(frames_df['frame_path'], frames_df['label'], test_size=0.3, random_state=2024)

In [14]:
label_to_int = {label_name: i for i, label_name in enumerate(set(frames_df['label']))}

In [15]:
label_to_int

{'country line dancing': 0,
 'dancing macarena': 1,
 'swing dancing': 2,
 'tango dancing': 3,
 'jumpstyle dancing': 4,
 'belly dancing': 5,
 'breakdancing': 6,
 'dancing ballet': 7,
 'dancing charleston': 8,
 'salsa dancing': 9,
 'mosh pit dancing': 10,
 'dancing gangnam style': 11,
 'tap dancing': 12,
 'robot dancing': 13,
 'square dancing': 14}

In [16]:
train_labels = [label_to_int[label_name] for label_name in train_classes]
test_labels = [label_to_int[label_name] for label_name in test_classes]

In [17]:
def load_n_preprocess(frame_path, label):
    image = tf.io.read_file(frame_path)
    image = tf.image.decode_jpeg(image, channels = 3)
    image = tf.image.resize(image, [224,224])
    image = image / 225
    return image, label

In [18]:
def lnp_all_images(frame_path, labels):
    images = [load_n_preprocess(frame_path) for frame_path in frames_df['frame_path']]
    return tf.stack(images), labels

In [19]:
train_frames_tensor = tf.convert_to_tensor(train_frames)
train_labels_tensor = tf.convert_to_tensor(train_labels)

test_frames_tensor = tf.convert_to_tensor(test_frames)
test_labels_tensor = tf.convert_to_tensor(test_labels)

In [20]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_frames_tensor, train_labels_tensor))
train_dataset = train_dataset.map(load_n_preprocess)
train_dataset = train_dataset.apply(tf.data.experimental.shuffle_and_repeat(40000)).batch(64)

In [21]:
test_dataset = tf.data.Dataset.from_tensor_slices((test_frames_tensor, test_labels_tensor))
test_dataset = test_dataset.map(load_n_preprocess)
test_dataset = test_dataset.apply(tf.data.experimental.shuffle_and_repeat(40000)).batch(64)


In [22]:
base_model = tf.keras.applications.ResNet50(include_top=False, weights='imagenet', input_shape=(224,224,3))
base_model.trainable = False

model = tf.keras.Sequential([
    base_model,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(15, activation='softmax')
])

In [23]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

In [24]:
accuracy_metric = tf.keras.metrics.SparseCategoricalAccuracy()

In [25]:
# @tf.function
# def train_step(images, labels):
#     with tf.GradientTape() as tape:
#         logits = model(images, training=True)
#         loss = loss_fn(labels, logits)
#     gradients = tape.gradient(loss, model.trainable_variables)
#     optimizer.apply_gradients(zip(gradients, model.trainable_variables))
#     accuracy = accuracy_metric(labels, logits)
#     return loss, accuracy

In [26]:
# @tf.function
# def test_step(images, labels):
#     logits = model(images, training=False)
#     loss = loss_fn(labels, logits)
#     accuracy = accuracy_metric(labels, logits)
#     return loss, accuracy

In [27]:
num_epochs = 20

In [28]:
# for epoch in range(num_epochs):
#     print('Epoch {}/{}'.format(epoch + 1, num_epochs))
#     accuracy_metric.reset_state()
# 
#     for images, labels in train_dataset:
#         loss, accuracy = train_step(images, labels)
#         accuracy_metric.update_state(accuracy, labels)
#         print('Loss: {}, Accuracy: {}'.format(loss, accuracy))
# 
#     print(f"Epoch accuracy: {accuracy_metric.result().numpy()}")
# 
#     accuracy_metric.reset_state()
#     for images, labels in test_dataset:
#         loss, accuracy = test_step(images, labels)
#         accuracy_metric.update_state(accuracy, labels)
#         print('Loss: {}, Accuracy: {}'.format(loss, accuracy))
# 
#     print(f"Epoch accuracy: {accuracy_metric.result().numpy()}")

In [29]:
model.compile(optimizer=optimizer, loss=loss_fn, metrics=[accuracy_metric])

In [30]:
history = model.fit(
    train_dataset,
    epochs=num_epochs,
    validation_data=test_dataset
)

Epoch 1/20
  40183/Unknown [1m80724s[0m 2s/step - loss: 2.0530 - sparse_categorical_accuracy: 0.3591

KeyboardInterrupt: 

In [None]:
train_loss = history.history['loss'][-1]
train_accuracy = history.history['sparse_categorical_accuracy'][-1]
test_loss = history.history['val_loss'][-1]
test_accuracy = history.history['val_sparse_categorical_accuracy'][-1]
print(f"Train loss: {train_loss:.4f}, Train accuracy: {train_accuracy:.4f}")
print(f"Test loss: {test_loss:.4f}, Test accuracy: {test_accuracy:.4f}")

In [None]:
model.save('my_model_200.keras')

In [None]:

image_labels = ['country line dancing',
 'tango dancing',
 'swing dancing',
 'jumpstyle dancing',
 'dancing gangnam style',
 'square dancing',
 'dancing charleston',
 'salsa dancing',
 'mosh pit dancing',
 'robot dancing',
 'tap dancing',
 'breakdancing',
 'belly dancing',
 'dancing ballet',
 'dancing macarena']


In [None]:
video = 'x0LzgIUDIes'
image_names = frames_df.loc[frames_df['video'] == video, 'frame_path']

In [None]:
classes = []
prob = []

for image_name in image_names:
    image = tf.io.read_file(image_name)
    image = tf.image.decode_jpeg(image, channels = 3)
    image = tf.image.resize(image, [224,224])
    image = np.expand_dims(image, axis=0)
    image = image / 225
    pred = model.predict(image)
    predicted_label_index = np.argmax(pred[0])
    probability = pred[0][predicted_label_index]
    # print(f"Предсказанный класс: {image_labels[predicted_label_index]}, Вероятность: {probability:.4f}")
    classes.append(image_labels[predicted_label_index])
    prob.append(probability)

In [None]:
dict = pd.DataFrame({'label': classes, 'probability': prob})
max_dict_prob = dict.mode()['probability'].max()
pdict = dict[dict['label'] == f"{dict.mode()['label'][0]}"]

In [None]:
print(f"Класс видео: {frames_df.loc[frames_df['video'] == video, 'label'].values[0]}")
print("_______________________________________")
print("Предсказанные классы по фреймам:")
print(dict)
print(f"Наиболее встречающийся класс: {dict.mode()['label'][0]} , средняя вероятность {pdict['probability'].mean()}")
print(f"Наибольшая вероятность: {max_dict_prob}, класс {dict.loc[dict['probability'] == max_dict_prob, 'label'].values[0]}")

Видео: x0LzgIUDIes
Модель на 20 кадров в видео
Класс видео: mosh pit dancing
_______________________________________
Предсказанные классы по фреймам:
                label  probability
0   jumpstyle dancing     0.153224
1   jumpstyle dancing     0.146594
2   jumpstyle dancing     0.151000
3   jumpstyle dancing     0.141237
4   jumpstyle dancing     0.131609
5   jumpstyle dancing     0.169623
6   jumpstyle dancing     0.172320
7   jumpstyle dancing     0.180115
8       belly dancing     0.124170
9    dancing macarena     0.122473
10   dancing macarena     0.161632
11   dancing macarena     0.131638
12      belly dancing     0.123214
13   mosh pit dancing     0.214120
14   mosh pit dancing     0.205695
15   mosh pit dancing     0.295078
16     square dancing     0.211966
17   mosh pit dancing     0.280752
18   mosh pit dancing     0.335356
19   mosh pit dancing     0.221957
20   mosh pit dancing     0.236604
Наиболее встречающийся класс: jumpstyle dancing , средняя вероятность 0.15571531653404236
Наибольшая вероятность: 0.33535560965538025, класс mosh pit dancing