In [1]:
import sys
import os
import subprocess
import shutil

import numpy as np
import pandas as pd
from qdrant_client import QdrantClient
from qdrant_client.http import models

In [2]:
if not (sys.version_info[0]==3 and sys.version_info[1]>=8):
    raise Exception('Must be Python 3.8 or above')

In [3]:
USERS_PATH = 'users'
VIDEO_DIR_NAME = 'videos'
VIDEO_VEC_DIR_NAME = 'video_vectors'
COLLECTION_NAME = "vector_collection"

In [8]:
def create_user(user_name, collection = COLLECTION_NAME):
    if user_name not in os.listdir(USERS_PATH):
        os.mkdir('/'.join([USERS_PATH, user_name]))
    if VIDEO_DIR_NAME not in os.listdir('/'.join([USERS_PATH, user_name])):
        os.mkdir('/'.join([USERS_PATH, user_name, VIDEO_DIR_NAME]))
    if VIDEO_VEC_DIR_NAME not in os.listdir('/'.join([USERS_PATH, user_name])):
        os.mkdir('/'.join([USERS_PATH, user_name, VIDEO_VEC_DIR_NAME]))
    client = QdrantClient(path='users/test_user/db')
    client.recreate_collection(
        collection_name=collection,
        vectors_config=models.VectorParams(size=1024, distance=models.Distance.COSINE)
    )
    return client

def create_raw_vector_file(input_file, output_file):
    command = [
                "conda/bin/python3.7",
                "video_embedding/video2vec.py",
                "--graph_file", "./Models/inception-v3_image_classify_graph_def.pb",
                "--fcnn_model", "./Models/weibo_MCN_14k_frames30_sfps1.ckpt-done",
                "--input_file", input_file,
                "--output_file", output_file
            ]
    subprocess.run(command)

def process_raw_vector(raw_vector):
    vector = raw_vector.split(',')[-1].split('_')
    vector = np.array([float(x) for x in vector])
    return vector

def add_one_vector_to_bd(processed_vector,
                         video_name,
                         client,
                         collection = COLLECTION_NAME):
    amount_of_vectors = client.count(
    collection_name=collection, 
    exact=True,
    ).count
    client.upsert(
        collection_name=collection,
        points=models.Batch(
            ids=[amount_of_vectors],
            vectors=[processed_vector],
            payloads=[{'source': video_name}]
        )
    )

def calc_vector(video_name, 
                user_name,
                client, 
                collection = COLLECTION_NAME,
                users_path = USERS_PATH, 
                video_dir_name = VIDEO_DIR_NAME, 
                video_vec_dir_name = VIDEO_VEC_DIR_NAME):
    input_file = '/'.join(['.', users_path, user_name, video_dir_name, video_name])
    output_file = '/'.join(['.', users_path, user_name, video_vec_dir_name, '.'.join(video_name.split('.')[:-1])])
    create_raw_vector_file(input_file, output_file)
    processed_vector = process_raw_vector(open(output_file, 'r').readline())
    add_one_vector_to_bd(processed_vector, video_name, client, collection)

def calc_vector_and_get_closest(video_name, 
                                user_name,
                                client, 
                                collection = COLLECTION_NAME,
                                users_path = USERS_PATH, 
                                video_dir_name = VIDEO_DIR_NAME, 
                                video_vec_dir_name = VIDEO_VEC_DIR_NAME,
                                limit = 5):
    input_file = '/'.join(['.', users_path, user_name, video_dir_name, video_name])
    output_file = '/'.join(['.', users_path, user_name, video_vec_dir_name, '.'.join(video_name.split('.')[:-1])])
    create_raw_vector_file(input_file, output_file)
    processed_vector = process_raw_vector(open(output_file, 'r').readline())
    return client.search(
    collection_name=collection,
    query_vector=processed_vector,
    limit=limit
    )

In [17]:
user_name = 'test_user'

all_users = {}
all_users['user_name'] = create_user(user_name)

In [21]:
for video_name in os.listdir('Videos/'):
    shutil.copyfile(f'Videos/{video_name}', '/'.join([USERS_PATH, user_name, VIDEO_DIR_NAME, video_name]))

In [None]:
for video_name in os.listdir('/'.join([USERS_PATH, user_name, VIDEO_DIR_NAME])):
    calc_vector(video_name,
                user_name,
                all_users['user_name'])

In [22]:
search_result = calc_vector_and_get_closest('cars_part.mp4',
                                        user_name,
                                        all_users['user_name'])




Instructions for updating:
Use tf.gfile.GFile.
W0403 14:11:03.317576 140650387453760 deprecation.py:323] From /home/alex/video_embedding/video_embedding/embedding/image_embedding.py:44: FastGFile.__init__ (from tensorflow.python.platform.gfile) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.gfile.GFile.

W0403 14:11:03.317660 140650387453760 module_wrapper.py:139] From /home/alex/video_embedding/video_embedding/embedding/image_embedding.py:45: The name tf.GraphDef is deprecated. Please use tf.compat.v1.GraphDef instead.

2024-04-03 14:11:03.456132: W tensorflow/core/framework/op_def_util.cc:357] Op BatchNormWithGlobalNormalization is deprecated. It will cease to work in GraphDef version 9. Use tf.nn.batch_normalization().

W0403 14:11:03.518651 140650387453760 module_wrapper.py:139] From video_embedding/video2vec.py:36: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead.

INFO:tensorflow: Create graph from "

Layer 0, FramConvLayer | Conv HWIO,S = [4,1,1,32],1; Pooling HW,S = [2, 1],2
Layer 1, FramConvLayer | Conv HWIO,S = [4,1,32,16],1; Pooling HW,S = [2, 1],2
Layer 2, FramConvLayer | Conv HWIO,S = [3,1,16,8],1; Pooling HW,S = [2, 1],2
Layer 3, FramConvLayer | Conv HWIO,S = [3,1,8,4],1; Pooling HW,S = [2, 1],2
Layer 4, FramConvLayer | Conv HWIO,S = [2,1,4,2],1; Pooling HW,S = [2, 1],2
Layer 5, FramConvLayer | Conv HWIO,S = [2,1,2,1],1; Pooling HW,S = [2, 1],2
Layer 6, DenseConnLayer | IO = [2048,1024]
Layer 7, SoftmaxLayer | IO = [1024,28]


INFO:tensorflow:Restoring parameters from ./Models/weibo_MCN_14k_frames30_sfps1.ckpt-done
I0403 14:11:12.724215 140650387453760 saver.py:1284] Restoring parameters from ./Models/weibo_MCN_14k_frames30_sfps1.ckpt-done
INFO:tensorflow: Restoring model from "./Models/weibo_MCN_14k_frames30_sfps1.ckpt-done"
I0403 14:11:12.855102 140650387453760 video2vec.py:55]  Restoring model from "./Models/weibo_MCN_14k_frames30_sfps1.ckpt-done"
INFO:tensorflow: Done!! Please check "./users/test_user_2/video_vectors/cars_part"
I0403 14:11:13.053834 140650387453760 video2vec.py:93]  Done!! Please check "./users/test_user_2/video_vectors/cars_part"


In [24]:
search_result[0]

ScoredPoint(id=5, version=0, score=0.7638798476612538, payload={'source': 'cars.mp4'}, vector=None, shard_key=None)

In [27]:
print('Best match:'+search_result[0].payload['source'], search_result[0].score)
print('Closest matches: ')
for result in search_result[1:]:
    print(result.payload['source'], '\tScore:', result.score )

Best match:cars.mp4 0.7638798476612538
Closest matches: 
bugatti.mp4 	Score: 0.6636374241685357
old_car.mp4 	Score: 0.5222012856200975
flowers.mp4 	Score: 0.5186204743832249
news_car.mp4 	Score: 0.4982871169034968


In [7]:
amount_of_vectors = all_users['user_name'].count(
    collection_name=COLLECTION_NAME, 
    exact=True,
    )

In [10]:
amount_of_vectors.count

0

In [20]:
# conda/bin/python3.7 video_embedding/video2vec.py --graph_file ./Models/inception-v3_image_classify_graph_def.pb --fcnn_model ./Models/weibo_MCN_14k_frames30_sfps1.ckpt-done --input_file ./Videos/cars_part.mp4 --output_file ./cars_part_v
command = [
                "conda/bin/python3.7",
                "video_embedding/video2vec.py",
                "--graph_file", "./Models/inception-v3_image_classify_graph_def.pb",
                "--fcnn_model", "./Models/weibo_MCN_14k_frames30_sfps1.ckpt-done",
                "--input_file", "./Videos/cars_part.mp4",
                "--output_file", "./cars_part_v"
            ]
subprocess.run(command)




Instructions for updating:
Use tf.gfile.GFile.
W0403 11:52:42.218444 140081366599488 deprecation.py:323] From /home/alex/video_embedding/video_embedding/embedding/image_embedding.py:44: FastGFile.__init__ (from tensorflow.python.platform.gfile) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.gfile.GFile.

W0403 11:52:42.218548 140081366599488 module_wrapper.py:139] From /home/alex/video_embedding/video_embedding/embedding/image_embedding.py:45: The name tf.GraphDef is deprecated. Please use tf.compat.v1.GraphDef instead.

2024-04-03 11:52:42.371873: W tensorflow/core/framework/op_def_util.cc:357] Op BatchNormWithGlobalNormalization is deprecated. It will cease to work in GraphDef version 9. Use tf.nn.batch_normalization().

W0403 11:52:42.438078 140081366599488 module_wrapper.py:139] From video_embedding/video2vec.py:36: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead.

INFO:tensorflow: Create graph from "

Layer 0, FramConvLayer | Conv HWIO,S = [4,1,1,32],1; Pooling HW,S = [2, 1],2
Layer 1, FramConvLayer | Conv HWIO,S = [4,1,32,16],1; Pooling HW,S = [2, 1],2
Layer 2, FramConvLayer | Conv HWIO,S = [3,1,16,8],1; Pooling HW,S = [2, 1],2
Layer 3, FramConvLayer | Conv HWIO,S = [3,1,8,4],1; Pooling HW,S = [2, 1],2
Layer 4, FramConvLayer | Conv HWIO,S = [2,1,4,2],1; Pooling HW,S = [2, 1],2
Layer 5, FramConvLayer | Conv HWIO,S = [2,1,2,1],1; Pooling HW,S = [2, 1],2
Layer 6, DenseConnLayer | IO = [2048,1024]
Layer 7, SoftmaxLayer | IO = [1024,28]


INFO:tensorflow:Restoring parameters from ./Models/weibo_MCN_14k_frames30_sfps1.ckpt-done
I0403 11:52:51.782610 140081366599488 saver.py:1284] Restoring parameters from ./Models/weibo_MCN_14k_frames30_sfps1.ckpt-done
INFO:tensorflow: Restoring model from "./Models/weibo_MCN_14k_frames30_sfps1.ckpt-done"
I0403 11:52:51.915764 140081366599488 video2vec.py:55]  Restoring model from "./Models/weibo_MCN_14k_frames30_sfps1.ckpt-done"
INFO:tensorflow: Done!! Please check "./cars_part_v"
I0403 11:52:52.123811 140081366599488 video2vec.py:93]  Done!! Please check "./cars_part_v"


CompletedProcess(args=['conda/bin/python3.7', 'video_embedding/video2vec.py', '--graph_file', './Models/inception-v3_image_classify_graph_def.pb', '--fcnn_model', './Models/weibo_MCN_14k_frames30_sfps1.ckpt-done', '--input_file', './Videos/cars_part.mp4', '--output_file', './cars_part_v'], returncode=0)

In [3]:
vide_vectors_path = 'Video_vectors/'

In [5]:
processed_vectors = {}
for video_vector in os.listdir(vide_vectors_path):
    processed_vectors[video_vector] = process_raw_vector(open(vide_vectors_path+video_vector, 'r').readline())

In [6]:
vectors = pd.DataFrame({'source': list(processed_vectors.keys()), 'embedding': list(processed_vectors.values())})

In [14]:
vectors

Unnamed: 0,source,embedding
0,flowers_v,"[0.247657, 0.355781, -0.0, 0.827153, 0.631797,..."
1,city_v,"[-0.0, -0.0, 0.499744, -0.0, -0.0, -0.0, -0.0,..."
2,cars_v,"[0.644818, -0.0, -0.0, -0.0, 0.933452, -0.0, -..."
3,cars_part_v,"[0.045901, 0.622638, -0.0, -0.0, -0.0, -0.0, 0..."


In [8]:
client = QdrantClient(path='users/test_user/db')
my_collection = "vector_collection"
client.recreate_collection(
    collection_name=my_collection,
    vectors_config=models.VectorParams(size=1024, distance=models.Distance.COSINE)
)

True

In [15]:
vectors['embedding'].values.tolist()

[array([ 0.247657,  0.355781, -0.      , ..., -0.      , -0.      ,
         0.163055]),
 array([-0.      , -0.      ,  0.499744, ...,  1.191377, -0.      ,
        -0.      ]),
 array([ 0.644818, -0.      , -0.      , ..., -0.      , -0.      ,
         1.191729]),
 array([ 0.045901,  0.622638, -0.      , ..., -0.      , -0.      ,
        -0.      ])]

In [9]:
client.upsert(
        collection_name=my_collection,
        points=models.Batch(
            ids=list(range(len(vectors))),
            vectors=vectors['embedding'].values.tolist(),
            payloads=[{'source': i} for i in vectors['source'].values.tolist()]
        )
    )

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [12]:
client.count(
    collection_name=my_collection, 
    exact=True,
)

CountResult(count=4)

In [13]:
client.scroll(
    collection_name=my_collection,
    limit=5
)

([Record(id=0, payload={'source': 'flowers_v'}, vector=None, shard_key=None),
  Record(id=1, payload={'source': 'city_v'}, vector=None, shard_key=None),
  Record(id=2, payload={'source': 'cars_v'}, vector=None, shard_key=None),
  Record(id=3, payload={'source': 'cars_part_v'}, vector=None, shard_key=None)],
 None)

In [17]:
client.search(
    collection_name=my_collection,
    query_vector=vectors['embedding'].values.tolist()[3],
    limit=5
)

[ScoredPoint(id=3, version=0, score=0.9999999970113996, payload={'source': 'cars_part_v'}, vector=None, shard_key=None),
 ScoredPoint(id=2, version=0, score=0.7638798476612538, payload={'source': 'cars_v'}, vector=None, shard_key=None),
 ScoredPoint(id=0, version=0, score=0.518620474383225, payload={'source': 'flowers_v'}, vector=None, shard_key=None),
 ScoredPoint(id=1, version=0, score=0.4860995158778002, payload={'source': 'city_v'}, vector=None, shard_key=None)]

In [10]:
# def cosine_similarity(v_1, v_2):
#     return np.dot(v_1,v_2)/(np.linalg.norm(v_1)*np.linalg.norm(v_2))

In [11]:
# print('Car Full -- Car Clip:\t\t',cosine_similarity(vector_cars, vector_cars_part))
# print('Flowers Full -- Car Clip:\t',cosine_similarity(vector_flowers, vector_cars_part))
# print('City Full  -- Car Clip:\t\t',cosine_similarity(vector_city, vector_cars_part))