In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow.keras.callbacks import CSVLogger, ReduceLROnPlateau
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import CSVLogger, ReduceLROnPlateau
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from tensorflow.keras.layers import Dense, Flatten, Dropout, UpSampling2D, BatchNormalization, Conv2D, MaxPool2D, InputLayer, GlobalAveragePooling2D

In [2]:
train_df = pd.read_csv('train_frames_20.csv')
test_df = pd.read_csv('test_frames_20.csv')
train_df

Unnamed: 0,video,frame_path,label
0,GcluCxjiSjI,train/0_0.jpg,tap dancing
1,GcluCxjiSjI,train/0_1.jpg,tap dancing
2,GcluCxjiSjI,train/0_2.jpg,tap dancing
3,GcluCxjiSjI,train/0_3.jpg,tap dancing
4,GcluCxjiSjI,train/0_4.jpg,tap dancing
...,...,...,...
11167,LT-e_wj6d9w,train/617_16.jpg,tango dancing
11168,LT-e_wj6d9w,train/617_17.jpg,tango dancing
11169,LT-e_wj6d9w,train/617_18.jpg,tango dancing
11170,LT-e_wj6d9w,train/617_19.jpg,tango dancing


In [3]:
test_df

Unnamed: 0,video,frame_path,label
0,BheDJtlAYpA,test/25_0.jpg,tap dancing
1,BheDJtlAYpA,test/25_1.jpg,tap dancing
2,BheDJtlAYpA,test/25_2.jpg,tap dancing
3,BheDJtlAYpA,test/25_3.jpg,tap dancing
4,BheDJtlAYpA,test/25_4.jpg,tap dancing
...,...,...,...
1522,F9Ehijaqdl0,test/587_16.jpg,tango dancing
1523,F9Ehijaqdl0,test/587_17.jpg,tango dancing
1524,F9Ehijaqdl0,test/587_18.jpg,tango dancing
1525,F9Ehijaqdl0,test/587_19.jpg,tango dancing


In [4]:
y_train = train_df["label"]
x_train = train_df.drop(labels = ["label", "video"], axis = 1)

y_test = test_df["label"]
x_test = test_df.drop(labels = ["label", "video"], axis = 1)

In [5]:
label_train_to_int = {label_name: i for i, label_name in enumerate(set(train_df['label']))}
label_test_to_int = {label_name: i for i, label_name in enumerate(set(test_df['label']))}

In [6]:
train_labels = [label_train_to_int[label_name] for label_name in y_train]
test_labels = [label_test_to_int[label_name] for label_name in y_test]

In [7]:
def load_n_preprocess(frame_path):
    image = tf.io.read_file(f'{frame_path}')
    image = tf.image.decode_jpeg(image, channels = 3)
    image = tf.image.resize(image, [128,128])
    image = preprocess_input(image)
    image = image / 225

    return image

In [8]:
def lnp_all_images(df):
    images = [load_n_preprocess(frame_path) for frame_path in df['frame_path']]
    return tf.stack(images)

In [9]:
x_train = lnp_all_images(x_train)
train_frames_dataset = tf.data.Dataset.from_tensor_slices(x_train)

train_labels_tensor = tf.convert_to_tensor(train_labels)
train_labels_dataset = tf.data.Dataset.from_tensor_slices(train_labels_tensor)
train_dataset = tf.data.Dataset.zip((train_frames_dataset, train_labels_dataset))

x_test = lnp_all_images(x_test)
test_frames_dataset = tf.data.Dataset.from_tensor_slices(x_test)

test_labels_tensor = tf.convert_to_tensor(test_labels)
test_labels_dataset = tf.data.Dataset.from_tensor_slices(test_labels_tensor)
test_dataset = tf.data.Dataset.zip((test_frames_dataset, test_labels_dataset))

In [10]:
train_dataset = train_dataset.shuffle(len(train_dataset)).batch(64)

In [11]:
test_dataset = test_dataset.shuffle(len(test_dataset)).batch(64)

In [12]:
base_model = tf.keras.applications.ResNet50(include_top=False, weights='imagenet', input_shape=(224,224,3))
base_model.trainable = False

model = tf.keras.Sequential([
    base_model,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(15, activation='softmax')
])

In [13]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

In [14]:
accuracy_metric = tf.keras.metrics.SparseCategoricalAccuracy()

In [17]:
num_epochs = 50

In [19]:
model.compile(optimizer=optimizer, loss=loss_fn, metrics=[accuracy_metric])

In [20]:
history = model.fit(
    train_dataset,
    epochs=num_epochs,
    validation_data=test_dataset
)

Epoch 1/50
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m471s[0m 3s/step - loss: 2.7215 - sparse_categorical_accuracy: 0.0825 - val_loss: 2.6438 - val_sparse_categorical_accuracy: 0.1409
Epoch 2/50
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m394s[0m 3s/step - loss: 2.6163 - sparse_categorical_accuracy: 0.1340 - val_loss: 2.6001 - val_sparse_categorical_accuracy: 0.1234
Epoch 3/50
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m394s[0m 3s/step - loss: 2.5836 - sparse_categorical_accuracy: 0.1382 - val_loss: 2.5699 - val_sparse_categorical_accuracy: 0.1651
Epoch 4/50
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m395s[0m 3s/step - loss: 2.5470 - sparse_categorical_accuracy: 0.1557 - val_loss: 2.5377 - val_sparse_categorical_accuracy: 0.1583
Epoch 5/50
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m399s[0m 3s/step - loss: 2.5071 - sparse_categorical_accuracy: 0.1660 - val_loss: 2.5180 - val_sparse_categorical_accuracy: 0.

In [21]:
train_loss = history.history['loss'][-1]
train_accuracy = history.history['sparse_categorical_accuracy'][-1]
test_loss = history.history['val_loss'][-1]
test_accuracy = history.history['val_sparse_categorical_accuracy'][-1]
print(f"Train loss: {train_loss:.4f}, Train accuracy: {train_accuracy:.4f}")
print(f"Test loss: {test_loss:.4f}, Test accuracy: {test_accuracy:.4f}")

Train loss: 2.0415, Train accuracy: 0.3664
Test loss: 2.1043, Test accuracy: 0.3499


In [22]:
model.save('model_034.keras')

In [23]:

image_labels = ['country line dancing',
 'tango dancing',
 'swing dancing',
 'jumpstyle dancing',
 'dancing gangnam style',
 'square dancing',
 'dancing charleston',
 'salsa dancing',
 'mosh pit dancing',
 'robot dancing',
 'tap dancing',
 'breakdancing',
 'belly dancing',
 'dancing ballet',
 'dancing macarena']


In [24]:
def predict_video(video):
    classes = []
    prob = []
    image_names = test_df.loc[test_df['video'] == video, 'frame_path']
    for image_name in image_names:
        image = tf.io.read_file(image_name)
        image = tf.image.decode_jpeg(image, channels = 3)
        image = tf.image.resize(image, [128,128])
        image = np.expand_dims(image, axis=0)
        image = image / 225
        pred = model.predict(image, verbose=0)
        predicted_label_index = np.argmax(pred[0])
        probability = pred[0][predicted_label_index]
        # print(f"Предсказанный класс: {image_labels[predicted_label_index]}, Вероятность: {probability:.4f}")
        classes.append(image_labels[predicted_label_index])
        prob.append(probability)
    result_dict = pd.DataFrame({'label': classes, 'probability': prob})
    max_dict_prob = result_dict.mode()['probability'].max()
    pdict = result_dict[result_dict['label'] == f"{result_dict.mode()['label'][0]}"]
    return max_dict_prob, pdict, result_dict

In [25]:
test_df = pd.read_csv('test_frames_20.csv')
videos = test_df['video'].unique()
videos

array(['BheDJtlAYpA', '3hII9QR7sag', 'EgM_PTRnV2k', 'iT088BHtz5Y',
       '3gaE6kCFXMg', 'OETUdxEamNI', '8KB7Y262bnE', 'S6oaDdsOVX0',
       '45PO7whfm0A', 'vpWkTE9AbAE', 'UpNvGpDvDiA', 'b0tigV6LTi0',
       'U7qIS5cRhok', 'brE_rDvfn1g', 'bfoFYNHN1po', 'S3IEpNzRdgw',
       'iSIUI8mnU4Y', 'p6Qp9KyqPrM', 'xx4c-7-FNjE', 'IJLpbYXHpbQ',
       'wjL3VY73HqI', '1jNw6TUXpqQ', 'xsRNDo8Upys', 'Qho8YFNeyd8',
       'I6FLGOi43WQ', 'JEMtzhgvH9w', 'og0NG5r04gQ', 'N4_W7KkwaH0',
       'S6PiB8G06ls', 'WCEqNkSK68w', 'ab0f7P0X-uo', 'iFHoIxvt3Lc',
       'QAso9ki6KQ8', '-onTnotvtPI', 'Q5bjuC2zF4s', 'JJcoEwO0yc8',
       '4uCa0PgPRK0', 'a3zZOA0-Q0c', 'QuN5OYQ0K7Q', 'jop0c4PBByQ',
       '7aaP0JOWqto', 'H7BLjO_mVJo', 'Wi5sejYC6xs', 'hM6RQslR6B4',
       'jGuz4i3DYSs', 'WFF2528OhLU', 'uDssN4AhH7g', 'kUvBdvAU_pE',
       'lEvO4vRJgJ0', 'QZ6FBg5EkmA', 'S1IBhxdrK8Y', 'AFOA-p3S0gI',
       'u4crGNsyKuE', 'EfZlpMhU2h8', 'So-qvuOygvY', 'HfOpoduIfCM',
       'uxalPdl8yfw', 'yNNwLGZS9_8', 'b2z51ez006g', '-9N39otwJ

In [26]:
metric_data = {}
for video in videos:
    max_dict_prob, pdict, result_dict = predict_video(video)
    
    print("_______________________________________")
    print(f"Видео: {video}")
    current_class = {test_df.loc[test_df['video'] == video, 'label'].values[0]}
    print(f"Класс видео: {current_class}")
    predicted_class = {pdict.mode()['label'][0]}
    predicted_class_prob = {pdict['probability'].mean()}
    print(f"Наиболее встречающийся класс: {predicted_class} , средняя вероятность {predicted_class_prob}")
    if len(pdict.loc[result_dict['probability'] == max_dict_prob, 'label'].values) > 0:
        max_label = pdict.loc[result_dict['probability'] == max_dict_prob, 'label'].values[0]
        print(f"Наибольшая вероятность: {max_dict_prob}, класс {max_label}")
    else:
        print("Не удалось найти соответствующий класс для наибольшей вероятности.")
    metric_data[video] = [predicted_class, predicted_class_prob, current_class]

_______________________________________
Видео: BheDJtlAYpA
Класс видео: {'tap dancing'}
Наиболее встречающийся класс: {'tango dancing'} , средняя вероятность {0.24760437}
Наибольшая вероятность: 0.2702961564064026, класс tango dancing
_______________________________________
Видео: 3hII9QR7sag
Класс видео: {'tap dancing'}
Наиболее встречающийся класс: {'dancing gangnam style'} , средняя вероятность {0.19682547}
Наибольшая вероятность: 0.21876397728919983, класс dancing gangnam style
_______________________________________
Видео: EgM_PTRnV2k
Класс видео: {'tap dancing'}
Наиболее встречающийся класс: {'dancing gangnam style'} , средняя вероятность {0.19543193}
Не удалось найти соответствующий класс для наибольшей вероятности.
_______________________________________
Видео: iT088BHtz5Y
Класс видео: {'tap dancing'}
Наиболее встречающийся класс: {'dancing gangnam style'} , средняя вероятность {0.27174395}
Наибольшая вероятность: 0.30042290687561035, класс dancing gangnam style
_______________

In [27]:
true_pos_count = 0
for pred_c, prob, current_c in metric_data.values():
    if pred_c == current_c:
        true_pos_count += 1
        print(f"Предсказаный: {pred_c}, с точностью: {prob}, оригинал: {current_c}")

print(f"accuracy = {(true_pos_count / 75 * 100)} %")

Предсказаный: {'jumpstyle dancing'}, с точностью: {0.46352923}, оригинал: {'jumpstyle dancing'}
Предсказаный: {'jumpstyle dancing'}, с точностью: {0.18908684}, оригинал: {'jumpstyle dancing'}
Предсказаный: {'mosh pit dancing'}, с точностью: {0.26746187}, оригинал: {'mosh pit dancing'}
Предсказаный: {'mosh pit dancing'}, с точностью: {0.21045071}, оригинал: {'mosh pit dancing'}
Предсказаный: {'mosh pit dancing'}, с точностью: {0.25637347}, оригинал: {'mosh pit dancing'}
Предсказаный: {'mosh pit dancing'}, с точностью: {0.403565}, оригинал: {'mosh pit dancing'}
Предсказаный: {'dancing gangnam style'}, с точностью: {0.21466653}, оригинал: {'dancing gangnam style'}
Предсказаный: {'dancing gangnam style'}, с точностью: {0.21280341}, оригинал: {'dancing gangnam style'}
Предсказаный: {'dancing gangnam style'}, с точностью: {0.20292388}, оригинал: {'dancing gangnam style'}
Предсказаный: {'dancing gangnam style'}, с точностью: {0.26781078}, оригинал: {'dancing gangnam style'}
accuracy = 13,3333