In [1]:
def extract_frames(path, video_filename):
    
    # Используем OpenCV для чтения видео
    cap = cv2.VideoCapture(path + video_filename)

    # Получаем исходную частоту кадров видео
    original_fps = cap.get(cv2.CAP_PROP_FPS)

    frames = []
    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Преобразуем кадр из BGR (OpenCV формат) в RGB
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Изменяем размер кадра до 224x224 (или другого размера по вашему выбору)

        frame = cv2.resize(frame, (400, 400))
    
        # Проверяем, нужно ли сохранить текущий кадр
        if frame_count % int(original_fps ) == 0:
            frames.append(frame)  # Преобразуем в тензор и меняем порядок осей

        frame_count += 1

    cap.release()

    return np.stack(frames, axis=0)

In [2]:
def create_embedding_dataframe(video1_embeddings, video2_embeddings, interval1, interval2):
    start1, end1 = map(int, interval1.split('-'))
    start2, end2 = map(int, interval2.split('-'))

    if (end1 - start1) != (end2 - start2):
        raise ValueError("Интервалы должны быть одинаковой длины")

    frames1 = []
    frames2 = []
    is_match = []

    # Добавляем совпадающие пары из интервалов
    for i in range(end1 - start1):
        emb1 = video1_embeddings[start1 + i]
        emb2 = video2_embeddings[start2 + i]
        
        frames1.append(emb1)
        frames2.append(emb2)
        
        is_match.append(1)

    total_pairs = end1 - start1 + 1
    all_indices_video1 = set(range(len(video1_embeddings)))
    all_indices_video2 = set(range(len(video2_embeddings)))
    interval_indices_video1 = set(range(start1, end1 + 1))
    interval_indices_video2 = set(range(start2, end2 + 1))

    non_interval_indices_video1 = list(all_indices_video1 - interval_indices_video1)
    non_interval_indices_video2 = list(all_indices_video2 - interval_indices_video2)

    if len(non_interval_indices_video1) < total_pairs or len(non_interval_indices_video2) < total_pairs:
        raise ValueError("Недостаточно элементов вне интервала для создания случайных пар")

    random_pairs_added = 0
    while random_pairs_added < total_pairs:
        idx1 = np.random.choice(non_interval_indices_video1)
        idx2 = np.random.choice(non_interval_indices_video2)

        emb1 = video1_embeddings[idx1]
        emb2 = video2_embeddings[idx2]

        frames1.append(emb1)
        frames2.append(emb2)
        
        is_match.append(0)

        random_pairs_added += 1

    df = pd.DataFrame({
        'frames1': frames1,
        'frames2' : frames2,
        'is_match': is_match
    })

    df = df.sample(frac=1).reset_index(drop=True)

    return df

In [3]:
import cv2
import pandas as pd
import numpy as np

train_df = pd.DataFrame()
df = pd.read_csv("piracy_val.csv")
for _,row in df.iterrows():
    piracy_ = extract_frames('val/', row["ID_piracy"])
    license_ = extract_frames('index/', row["ID_license"])
    proxy_df = create_embedding_dataframe(piracy_, license_, row["segment"], row["segment.1"])
    train_df = pd.concat([train_df, proxy_df], ignore_index=True)

train_df.head(5)

Unnamed: 0,frames1,frames2,is_match
0,"[[[193, 149, 148], [193, 149, 148], [195, 151,...","[[[57, 141, 56], [49, 133, 48], [61, 145, 60],...",0
1,"[[[142, 137, 134], [145, 140, 137], [147, 142,...","[[[24, 22, 30], [24, 22, 30], [24, 22, 30], [2...",1
2,"[[[147, 155, 160], [147, 155, 160], [147, 155,...","[[[33, 37, 41], [33, 37, 41], [33, 37, 41], [3...",1
3,"[[[188, 194, 195], [188, 194, 195], [188, 194,...","[[[92, 11, 16], [92, 11, 16], [92, 11, 16], [9...",0
4,"[[[155, 174, 184], [156, 175, 185], [156, 175,...","[[[217, 220, 227], [216, 219, 226], [215, 218,...",1


In [4]:
train_df.fillna(value=0, inplace=True)
y = train_df["is_match"].values
x = train_df[["frames1", "frames2"]]
print(x.shape)

(3443, 2)


In [5]:
import re
import numpy as np
from PIL import Image


from sklearn.model_selection import train_test_split
import tensorflow.python.keras.backend as K
from keras.layers import Activation
#from keras.layers import Input, Lambda, Dense, Dropout, Convolution2D, MaxPooling2D, Flatten
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Activation, Dropout, Input, Lambda
from keras.models import Sequential, Model
from keras.optimizers import RMSprop


x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8)

'''def build_base_network():
    
    seq = Sequential()    
    nb_filter = [6, 12]
    kernel_size = 3    
    
    seq.add(Input(shape=(2, )))
    #convolutional layer 1
    seq.add(Convolution2D(nb_filter[0], kernel_size,
                          padding='valid', data_format="channels_first"))
    seq.add(Activation('relu'))
    seq.add(MaxPooling2D(pool_size=(2, 2))) 
    seq.add(Dropout(.25))
    
    #convolutional layer 2
    seq.add(Convolution2D(nb_filter[1], kernel_size, kernel_size, padding='valid', data_format="channels_first"))
    seq.add(Activation('relu'))
    seq.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first")) 
    seq.add(Dropout(.25))

    #flatten 
    seq.add(Flatten())
    seq.add(Dense(128, activation='relu'))
    seq.add(Dropout(0.1))
    seq.add(Dense(50, activation='relu'))
    return seq'''


def build_base_network(input_shape):
    
    seq = Sequential()    
    nb_filter = [6, 12]
    kernel_size = 3    
    
    # Convolutional layer 1
    seq.add (Conv2D(nb_filter[0], (kernel_size, kernel_size), input_shape=input_shape,
                   padding='valid'))
    seq.add(Activation('relu'))
    seq.add(MaxPooling2D(pool_size=(2, 2))) 
    seq.add(Dropout(0.25))
    
    # Convolutional layer 2
    seq.add(Conv2D(nb_filter[1], (kernel_size, kernel_size), padding='valid'))
    seq.add(Activation('relu'))
    seq.add(MaxPooling2D(pool_size=(2, 2))) 
    seq.add(Dropout(0.25))

    # Flatten
    seq.add(Flatten())
    seq.add(Dense(128, activation='relu'))
    seq.add(Dropout(0.1))
    seq.add(Dense(50, activation='relu'))
    return seq

input_dim = (400, 400, 3)
input_a = Input(shape=input_dim)
input_b = Input(shape=input_dim)
#img_a = Input(shape=input_dim)
#img_b = Input(shape=input_dim)

base_network = build_base_network(input_dim)
#feat_vecs_a = base_network(img_a)
#feat_vecs_b = base_network(img_b)
feat_vecs_a = base_network(input_a)
feat_vecs_b = base_network(input_b)

def euclidean_distance(vects):
    x, y = vects
    sum_square = K.sum(K.square(x - y), axis=1, keepdims=True)
    euclidean_distance = K.sqrt(K.maximum(sum_square, K.epsilon()))
    #print(euclidean_distance)
    return euclidean_distance


def eucl_dist_output_shape(shapes):
    shape1, shape2 = shapes
    return (shape1[0], 1)
    

distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([feat_vecs_a, feat_vecs_b])


epochs = 40
rms = RMSprop()

model = Model(inputs=[input_a, input_b], outputs=distance)
def contrastive_loss(y_true, y_pred):
    margin = 1
    return K.mean(y_true * K.square(y_pred) + (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))

model.compile(loss=contrastive_loss, optimizer=rms)
#x_train = x_train[0]
print(x_train["frames1"].head())
img_1 = np.array(x_train["frames1"].to_list(), dtype=np.float32)
img_2 = np.array(x_train["frames2"].to_list(), dtype=np.float32)
img_1 = img_1 / 255.0
img_2 = img_2 / 255.0
#im = Image.fromarray(np.uint8(cm.gist_earth(myarray)*255))
#np_img = np.squeeze(np.array(img_1)[0],0)
#pil_img = Image.fromarray(np_img, 'RGB')



model.fit([img_1, img_2], y_train, validation_split=.25, batch_size=128, verbose=2, epochs=epochs)

img_1_test = np.array(x_test["frames1"].to_list(), dtype=np.float32)
img_2_test = np.array(x_test["frames2"].to_list(), dtype=np.float32)
img_1_test = img_1_test / 255.0
img_2_test = img_2_test / 255.0

pred = model.predict([img_1_test, img_2_test])

print(pred.var()) #distance ?
def compute_accuracy(predictions, labels):
    return labels[predictions.ravel() < 0.5].mean()

print(compute_accuracy(pred, y_test))


model.save_weights('model.weights.h5')
with open('model_architecture.json', 'w') as f:
    f.write(model.to_json())
print('saved')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


1660    [[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...
1782    [[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...
1842    [[[72, 33, 21], [72, 33, 21], [72, 33, 21], [7...
3266    [[[31, 118, 196], [31, 118, 196], [31, 118, 19...
2693    [[[121, 124, 116], [123, 126, 118], [125, 128,...
Name: frames1, dtype: object
Epoch 1/40
17/17 - 19s - 1s/step - loss: 153.2601 - val_loss: 0.4377
Epoch 2/40
17/17 - 17s - 995ms/step - loss: 0.4816 - val_loss: 0.3201
Epoch 3/40
17/17 - 17s - 976ms/step - loss: 0.4825 - val_loss: 2.5429
Epoch 4/40
17/17 - 17s - 975ms/step - loss: 1.7005 - val_loss: 0.3688
Epoch 5/40
17/17 - 16s - 958ms/step - loss: 0.5800 - val_loss: 0.5186
Epoch 6/40
17/17 - 17s - 977ms/step - loss: 1.0822 - val_loss: 0.5133
Epoch 7/40
17/17 - 17s - 984ms/step - loss: 0.4945 - val_loss: 0.5039
Epoch 8/40
17/17 - 16s - 971ms/step - loss: 1.3270 - val_loss: 0.5055
Epoch 9/40
17/17 - 17s - 1s/step - loss: 0.4837 - val_loss: 0.5004
Epoch 10/40
17/17 - 16s - 968ms/step - loss: 0.4769 - v