In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import CSVLogger, ReduceLROnPlateau

In [2]:

from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import CSVLogger, ReduceLROnPlateau
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from tensorflow.keras.layers import Dense, Flatten, Dropout, UpSampling2D, BatchNormalization

In [3]:
train_df = pd.read_csv('train_frames.csv')
test_df = pd.read_csv('test_frames.csv')
train_df

Unnamed: 0,video,frame_path,label
0,GcluCxjiSjI,frames/0_0.jpg,tap dancing
1,GcluCxjiSjI,frames/0_1.jpg,tap dancing
2,GcluCxjiSjI,frames/0_2.jpg,tap dancing
3,GcluCxjiSjI,frames/0_3.jpg,tap dancing
4,GcluCxjiSjI,frames/0_4.jpg,tap dancing
...,...,...,...
12694,LT-e_wj6d9w,frames/617_16.jpg,tango dancing
12695,LT-e_wj6d9w,frames/617_17.jpg,tango dancing
12696,LT-e_wj6d9w,frames/617_18.jpg,tango dancing
12697,LT-e_wj6d9w,frames/617_19.jpg,tango dancing


In [None]:
test_df

In [4]:
y_train = train_df["label"]
x_train = train_df.drop(labels = ["label", "video"], axis = 1)

y_test = test_df["label"]
x_test = test_df.drop(labels = ["label", "video"], axis = 1)

In [5]:
label_train_to_int = {label_name: i for i, label_name in enumerate(set(train_df['label']))}
label_test_to_int = {label_name: i for i, label_name in enumerate(set(test_df['label']))}

In [6]:
label_train_to_int

{'salsa dancing': 0,
 'mosh pit dancing': 1,
 'square dancing': 2,
 'dancing charleston': 3,
 'dancing gangnam style': 4,
 'tango dancing': 5,
 'dancing macarena': 6,
 'tap dancing': 7,
 'country line dancing': 8,
 'swing dancing': 9,
 'breakdancing': 10,
 'belly dancing': 11,
 'jumpstyle dancing': 12,
 'robot dancing': 13,
 'dancing ballet': 14}

In [None]:
label_test_to_int

In [7]:
# train_labels = [label_to_int[label_name] for label_name in train_classes]
# test_labels = [label_to_int[label_name] for label_name in test_classes]

In [8]:
train_labels = to_categorical(label_train_to_int, num_classes=15)
train_labels = train_labels.astype('float32')
test_labels = to_categorical(label_test_to_int, num_classes=15)
test_labels = test_labels.astype('float32')


In [9]:
def load_n_preprocess(frame_path, label):
    image = tf.io.read_file(frame_path)
    image = tf.image.decode_jpeg(image, channels = 3)
    image = tf.image.resize(image, [32,32])
    image = preprocess_input(image)
    image = image / 225

    return image, label

In [10]:
# def lnp_all_images(frame_path):
#     images = [load_n_preprocess(frame_path) for frame_path in frames_df['frame_path']]
#     return tf.stack(images)

In [11]:

train_frames_tensor = tf.convert_to_tensor(x_train)
train_labels_tensor = tf.convert_to_tensor(train_labels)

test_frames_tensor = tf.convert_to_tensor(x_test)
test_labels_tensor = tf.convert_to_tensor(test_labels)

In [12]:
train_frames_tensor

<tf.Tensor: shape=(8889,), dtype=string, numpy=
array([b'frames/19_9.jpg', b'frames/500_9.jpg', b'frames/359_15.jpg', ...,
       b'frames/129_6.jpg', b'frames/173_10.jpg', b'frames/378_3.jpg'],
      dtype=object)>

In [13]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_frames_tensor, train_labels_tensor))
train_dataset = train_dataset.map(load_n_preprocess)
train_dataset = train_dataset.shuffle(len(train_dataset)).batch(64)

In [14]:
test_dataset = tf.data.Dataset.from_tensor_slices((test_frames_tensor, test_labels_tensor))
test_dataset = test_dataset.map(load_n_preprocess)
test_dataset = test_dataset.shuffle(len(test_dataset)).batch(64)


In [15]:
train_dataset

<_BatchDataset element_spec=(TensorSpec(shape=(None, 32, 32, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 15), dtype=tf.float32, name=None))>

In [16]:
# base_model = tf.keras.applications.ResNet50(include_top=False, weights='imagenet', input_shape=(224,224,3))
# base_model.trainable = False
# 
# model = tf.keras.Sequential([
#     base_model,
#     tf.keras.layers.GlobalAveragePooling2D(),
#     tf.keras.layers.Dense(15, activation='softmax')
# ])
model = None
model = Sequential()
model.add(UpSampling2D())
model.add(UpSampling2D())
model.add(UpSampling2D())

inc_model = InceptionV3(include_top = False, weights = None, pooling = 'max', classes = 15)
for layer in inc_model.layers:
    layer.trainable = True
            
model.add(inc_model)
model.add(Flatten())
model.add(BatchNormalization())
model.add(Dense(64, activation = 'relu'))
model.add(Dropout(0.5))
model.add(BatchNormalization())
model.add(Dense(32, activation = 'relu'))
model.add(Dropout(0.5))
model.add(BatchNormalization())
model.add(Dense(15, activation = 'softmax'))

In [17]:
class NGL(tf.keras.losses.Loss):
    def __init__(
    	self, 
    	scaling=False,
    	name="ngl_loss"):
        super().__init__(name=name)
        self.name = name
        self.scaling = scaling

    def call(self, y_true, y_pred):
        y_true = tf.cast(y_true, tf.float32)
        y_pred = tf.cast(y_pred, tf.float32)
        if self.scaling == True:
	 	        y_pred = tf.math.sigmoid(y_pred)
        part_1 = tf.math.exp(2.4092 - y_pred - y_pred*y_true)
        part_2 = tf.math.cos(tf.math.cos(tf.math.sin(y_pred)))
        elements = part_1 - part_2
        loss = tf.reduce_mean(elements)
        return loss

In [18]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

In [19]:
accuracy_metric = tf.keras.metrics.SparseCategoricalAccuracy()

In [20]:
# @tf.function
# def train_step(images, labels):
#     with tf.GradientTape() as tape:
#         logits = model(images, training=True)
#         loss = loss_fn(labels, logits)
#     gradients = tape.gradient(loss, model.trainable_variables)
#     optimizer.apply_gradients(zip(gradients, model.trainable_variables))
#     accuracy = accuracy_metric(labels, logits)
#     return loss, accuracy

In [21]:
# @tf.function
# def test_step(images, labels):
#     logits = model(images, training=False)
#     loss = loss_fn(labels, logits)
#     accuracy = accuracy_metric(labels, logits)
#     return loss, accuracy

In [22]:
num_epochs = 20

In [23]:
# for epoch in range(num_epochs):
#     print('Epoch {}/{}'.format(epoch + 1, num_epochs))
#     accuracy_metric.reset_state()
# 
#     for images, labels in train_dataset:
#         loss, accuracy = train_step(images, labels)
#         accuracy_metric.update_state(accuracy, labels)
#         print('Loss: {}, Accuracy: {}'.format(loss, accuracy))
# 
#     print(f"Epoch accuracy: {accuracy_metric.result().numpy()}")
# 
#     accuracy_metric.reset_state()
#     for images, labels in test_dataset:
#         loss, accuracy = test_step(images, labels)
#         accuracy_metric.update_state(accuracy, labels)
#         print('Loss: {}, Accuracy: {}'.format(loss, accuracy))
# 
#     print(f"Epoch accuracy: {accuracy_metric.result().numpy()}")

In [24]:
model.compile(optimizer='adam', loss=loss_fn, metrics=[accuracy_metric])
model.build(input_shape = (None, 32, 32, 3))
model.summary()

In [25]:
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.2, patience=5, min_lr=1e-4)

In [26]:
filename = 'supplementary.csv'
csv_logger = CSVLogger(filename)

In [27]:
history = model.fit(
    train_dataset,
    batch_size=64, 
    epochs=num_epochs,
    validation_data=test_dataset,
    callbacks=[reduce_lr, csv_logger],
    verbose = 1
)

Epoch 1/20
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1192s[0m 8s/step - accuracy: 0.0706 - loss: 5.8221 - val_accuracy: 0.0635 - val_loss: 5.8209 - learning_rate: 0.0010
Epoch 2/20
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1122s[0m 8s/step - accuracy: 0.0797 - loss: 5.8214 - val_accuracy: 0.1021 - val_loss: 5.8209 - learning_rate: 0.0010
Epoch 3/20
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1093s[0m 8s/step - accuracy: 0.0885 - loss: 5.8211 - val_accuracy: 0.1231 - val_loss: 5.8208 - learning_rate: 0.0010
Epoch 4/20
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1094s[0m 8s/step - accuracy: 0.1006 - loss: 5.8210 - val_accuracy: 0.1457 - val_loss: 5.8210 - learning_rate: 0.0010
Epoch 5/20
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1087s[0m 8s/step - accuracy: 0.1286 - loss: 5.8209 - val_accuracy: 0.1924 - val_loss: 5.8208 - learning_rate: 0.0010
Epoch 6/20
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

In [28]:
loss, acc = model.evaluate(test_dataset, verbose=0)

In [30]:
train_loss = history.history['loss'][-1]
train_accuracy = history.history['accuracy'][-1]
test_loss = history.history['val_loss'][-1]
test_accuracy = history.history['val_accuracy'][-1]
print(f"Train loss: {train_loss:.4f}, Train accuracy: {train_accuracy:.4f}")
print(f"Test loss: {test_loss:.4f}, Test accuracy: {test_accuracy:.4f}")

Train loss: 5.8196, Train accuracy: 0.5467
Test loss: 5.8195, Test accuracy: 0.5903


In [None]:
# model.save('inception_ngl.keras')

In [33]:

image_labels = ['country line dancing',
 'tango dancing',
 'swing dancing',
 'jumpstyle dancing',
 'dancing gangnam style',
 'square dancing',
 'dancing charleston',
 'salsa dancing',
 'mosh pit dancing',
 'robot dancing',
 'tap dancing',
 'breakdancing',
 'belly dancing',
 'dancing ballet',
 'dancing macarena']


In [45]:
def predict_video(video):
    classes = []
    prob = []
    image_names = test_df.loc[test_df['video'] == video, 'frame_path']
    for image_name in image_names:
        image = tf.io.read_file(image_name)
        image = tf.image.decode_jpeg(image, channels = 3)
        image = tf.image.resize(image, [32,32])
        image = np.expand_dims(image, axis=0)
        image = image / 225
        pred = model.predict(image)
        predicted_label_index = np.argmax(pred[0])
        probability = pred[0][predicted_label_index]
        # print(f"Предсказанный класс: {image_labels[predicted_label_index]}, Вероятность: {probability:.4f}")
        classes.append(image_labels[predicted_label_index])
        prob.append(probability)
    dict = pd.DataFrame({'label': classes, 'probability': prob})
    max_dict_prob = dict.mode()['probability'].max()
    pdict = dict[dict['label'] == f"{dict.mode()['label'][0]}"]
    return max_dict_prob, pdict

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
Предсказанный класс: tango dancing, Вероятность: 0.1347
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
Предсказанный класс: tango dancing, Вероятность: 0.1346
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
Предсказанный класс: tango dancing, Вероятность: 0.1305
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
Предсказанный класс: tango dancing, Вероятность: 0.1299
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
Предсказанный класс: tango dancing, Вероятность: 0.1287
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
Предсказанный класс: tango dancing, Вероятность: 0.1239
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
Предсказанный класс: tango dancing, Вероятность: 0.1222
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
Предсказанный класс: tango dancing, Вероятность:

In [None]:
videos = test_df['video']
for video in videos:
    max_dict_prob, pdict = predict_video(video)
    print(f"Класс видео: {test_df.loc[test_df['video'] == video, 'label'].values[0]}")
    print("_______________________________________")
    print(f"Наиболее встречающийся класс: {dict.mode()['label'][0]} , средняя вероятность {pdict['probability'].mean()}")
    print(f"Наибольшая вероятность: {max_dict_prob}, класс {dict.loc[dict['probability'] == max_dict_prob, 'label'].values[0]}")

In [47]:
# print(f"Класс видео: {frames_df.loc[frames_df['video'] == video, 'label'].values[0]}")
# print("_______________________________________")
# print("Предсказанные классы по фреймам:")
# print(dict)
# print(f"Наиболее встречающийся класс: {dict.mode()['label'][0]} , средняя вероятность {pdict['probability'].mean()}")
# print(f"Наибольшая вероятность: {max_dict_prob}, класс {dict.loc[dict['probability'] == max_dict_prob, 'label'].values[0]}")

Класс видео: tango dancing
_______________________________________
Предсказанные классы по фреймам:
            label  probability
0   tango dancing     0.134746
1   tango dancing     0.134590
2   tango dancing     0.130474
3   tango dancing     0.129858
4   tango dancing     0.128675
5   tango dancing     0.123899
6   tango dancing     0.122184
7   tango dancing     0.126576
8   tango dancing     0.132133
9   tango dancing     0.149597
10  tango dancing     0.153763
11  tango dancing     0.163447
12  tango dancing     0.142676
13  tango dancing     0.141084
14  tango dancing     0.124360
15  tango dancing     0.131107
16  tango dancing     0.132135
17  tango dancing     0.131415
18  tango dancing     0.140565
19  tango dancing     0.123596
20  tango dancing     0.123248
Наиболее встречающийся класс: tango dancing , средняя вероятность 0.1342919021844864
Наибольшая вероятность: 0.16344745457172394, класс tango dancing


Видео: x0LzgIUDIes
Модель на 20 кадров в видео
Класс видео: mosh pit dancing
_______________________________________
Предсказанные классы по фреймам:
                label  probability
0   jumpstyle dancing     0.153224
1   jumpstyle dancing     0.146594
2   jumpstyle dancing     0.151000
3   jumpstyle dancing     0.141237
4   jumpstyle dancing     0.131609
5   jumpstyle dancing     0.169623
6   jumpstyle dancing     0.172320
7   jumpstyle dancing     0.180115
8       belly dancing     0.124170
9    dancing macarena     0.122473
10   dancing macarena     0.161632
11   dancing macarena     0.131638
12      belly dancing     0.123214
13   mosh pit dancing     0.214120
14   mosh pit dancing     0.205695
15   mosh pit dancing     0.295078
16     square dancing     0.211966
17   mosh pit dancing     0.280752
18   mosh pit dancing     0.335356
19   mosh pit dancing     0.221957
20   mosh pit dancing     0.236604
Наиболее встречающийся класс: jumpstyle dancing , средняя вероятность 0.15571531653404236
Наибольшая вероятность: 0.33535560965538025, класс mosh pit dancing

In [None]:
#TODO: добавить метрики для видео, подача 100 видео, разделение test/train поменять