In [7]:
import torch
from transformers import CLIPImageProcessor, CLIPModel, CLIPProcessor
from PIL import Image
import os
import numpy as np
import time
import random
import cv2
import faiss
import pandas as pd
import joblib

In [2]:
model_ID = "openai/clip-vit-base-patch32"
model = CLIPModel.from_pretrained(model_ID)
preprocess = CLIPImageProcessor.from_pretrained(model_ID)
processor = CLIPProcessor.from_pretrained(model_ID)

In [3]:
def load_and_preprocess_image_cv(image_path):
    image = cv2.imread(image_path)
    # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = preprocess(image, return_tensors="pt")
    return image

In [5]:
image_folder = 'photos'
image_files = os.listdir(image_folder)
image_embeddings = []

for image_file in image_files:
    try:
        image_path = os.path.join(image_folder, image_file)
        image = load_and_preprocess_image_cv(image_path)["pixel_values"]
        with torch.no_grad():
            image_embedding = model.get_image_features(image)
        image_embeddings.append(image_embedding)
    except:
        continue

In [12]:
# joblib.dump(image_embeddings, 'image_embeddings.joblib')

In [158]:
df = pd.read_csv('df.csv')
df['Vector'] = df['Vector'].apply(lambda x: np.fromstring(x, sep=' '))
books_vector = np.loadtxt('vectors.txt')
index = faiss.IndexFlatIP(books_vector.shape[1])
index.add(books_vector)

In [428]:
def recommendation(array: list, neighbours_clip: int = 17, neighbours_discription:int = 5) -> list:
    """
    array - list liked photo
    neighbours_clip - count predictions with CLIP
    neighbours_discription - count predictions with discription
    """
    recarray = []
    for img in array:
        path = os.path.join('photos', img)
        image = load_and_preprocess_image_cv(path)["pixel_values"]
        with torch.no_grad():
            embedding = model.get_image_features(image)
        similarity_scores = [torch.nn.functional.cosine_similarity(embedding, image_embedding) for image_embedding in image_embeddings]
        top_n_similarities, top_n_indices = torch.topk(torch.tensor(similarity_scores), k=neighbours_clip)
        top = np.array(image_files)[top_n_indices.tolist()].tolist()[1:]
        recarray.extend(top)
    for imgdis in array:
        user_text_pred = df[df['Image Name'] == imgdis]['Vector'].values[0]
        D, I = index.search(user_text_pred.reshape(1, -1), k=neighbours_discription)
        recarray.extend(df['Image Name'].iloc[I[0][1:]].values)
    
    recarray = list(set(recarray))
    return random.sample(recarray, 20)
    # return recarray

In [382]:
our_data = pd.read_csv('our_data.csv')
def sample_images(row, image_files, n=250):
    return random.sample(image_files, n)
our_data['recommended'] = our_data.apply(lambda row: sample_images(row, image_files), axis=1)

In [383]:
my_rec = random.sample(image_files, 72)

In [392]:
def intersection(rec:list) -> int:
    rec = set(rec)
    listrec = []
    for i in our_data['recommended']:
        listrec.append(len(set(my_rec) & set(i)))
    most_relevant = np.array(listrec).argsort()[-1]
    return our_data.iloc[most_relevant]

In [400]:
intersection(my_rec)

Ваше ФИО                            Зернов Владислав Александрович
Дата рождения                                           12.03.1996
Укажите город                                              Воронеж
Ваш пол                                                          М
about            ['Фастфуд', 'Поддерживаю чистоту и порядок, Вр...
recommended      [photo_2210_22-02-2022_22-59-57.jpg, photo_235...
Name: 29, dtype: object

In [399]:
len(set(our_data.iloc[intersection(my_rec).name]['recommended']) & set(my_rec))

17

In [25]:
rec_images = []
rec_images == 0

False

In [24]:
*rec_images

SyntaxError: invalid syntax (2043753684.py, line 1)

In [29]:
rec_images.extend([323])

In [30]:
rec_images

[323]

In [None]:
0:"photo_847_19-03-2020_19-10-40.jpg"
1:"photo_11_15-07-2019_20-41-33.jpg"
2:"photo_1355_19-10-2020_15-11-33.jpg"
3:"photo_523_07-12-2019_09-00-13.jpg"
4:"photo_401_29-10-2019_09-42-47.jpg"
5:"photo_830_15-03-2020_09-45-44.jpg"
6:"photo_1784_26-05-2021_15-41-57.jpg"
7:"photo_2480_14-08-2022_13-16-00.jpg"
8:"photo_1066_23-06-2020_09-37-23.jpg"
9:"photo_659_14-01-2020_11-01-19.jpg"
10:"photo_1484_17-12-2020_01-01-11.jpg"
11:"photo_2426_24-06-2022_16-23-47.jpg"
12:"photo_2157_29-12-2021_19-25-21.jpg"
13:"photo_1136_13-07-2020_17-05-38.jpg"
14:"photo_2366_05-06-2022_17-46-51.jpg"
15:"photo_1420_26-11-2020_10-15-47.jpg"
16:"photo_142_18-08-2019_17-15-33.jpg"
17:"photo_2447_07-07-2022_11-09-51.jpg"
18:"photo_1181_28-07-2020_21-34-59.jpg"
19:"photo_280_24-09-2019_16-17-54.jpg"