In [14]:
import ast

import pandas as pd
import numpy as np
from scipy.optimize import linear_sum_assignment
from sklearn.metrics.pairwise import cosine_similarity

from scripts.create_embeddings_dataset import batch_create_images_dataset

In [None]:
batch_create_images_dataset(images_dir='../images/imagesf2', output_dir="../images", batch_size=50, count=5000)

In [16]:
images_df = pd.read_csv("../images/images_dataset.csv")
music_df = pd.read_csv("../music/audio_dataset.csv")

In [19]:
def match_datasets(image_embeddings_df: pd.DataFrame, music_embeddings_df: pd.DataFrame) -> pd.DataFrame:
    image_embeddings = image_embeddings_df[["embeddings"]]
    music_embeddings = music_embeddings_df[["embeddings"]]

    image_embeddings = np.array([np.array(ast.literal_eval(e)) for e in image_embeddings["embeddings"].tolist()])
    music_embeddings = np.array([np.array(ast.literal_eval(e)) for e in music_embeddings["embeddings"].tolist()])

    similarity_matrix = cosine_similarity(image_embeddings, music_embeddings)

    cost_matrix = 1 - similarity_matrix
    image_ind, music_ind = linear_sum_assignment(cost_matrix)

    matched_pairs = pd.DataFrame()
    matched_pairs["image_path"] = image_embeddings_df.iloc[image_ind]["image_path"].values
    matched_pairs["audio_path"] = music_embeddings_df.iloc[music_ind]["audio_path"].values
    matched_pairs["music_embedding"] = music_embeddings_df.iloc[music_ind]["embeddings"].values
    matched_pairs["image_embedding"] = image_embeddings_df.iloc[image_ind]["embeddings"].values
    matched_pairs["score"] = 1 - cost_matrix[image_ind, music_ind]

    return matched_pairs

In [21]:
match_datasets(images_df, music_df).head(5)

Unnamed: 0,image_path,audio_path,music_embedding,image_embedding,score
0,antoni-tapies_sous-zero-1979.jpg,../data/music\rock\eddy - Pure Adrenaline.mp3,"[0.2464916706085205, 0.35366353392601013, -0.4...","[0.010796519927680492, -0.03631719946861267, 0...",0.192772
1,arkhip-kuindzhi_fishing-on-the-black-sea-1900.jpg,../data/music\electronic\Vernon Lenoir - Gora.mp3,"[-0.16167239844799042, 0.2554575502872467, -0....","[-0.017395511269569397, -0.019446855410933495,...",0.189284
2,chris-ofili_blue-devils-2014.jpg,../data/music\classical\Paul _Wine_ Jones - Pu...,"[0.5296326279640198, 0.010789294727146626, -0....","[0.024471327662467957, -0.027269702404737473, ...",0.082877
3,danielson-gambogi-elin_pianospelare-1907.jpg,../data/music\classical\Aaron Dunn - Minuet - ...,"[-0.12106630951166153, 0.6024715900421143, -0....","[-0.012119156308472157, -0.00800519809126854, ...",0.335021
4,dorothy-lathrop_the-fairy-circus-1931-1.jpg,../data/music\rock\Jon Shuemaker - Break it Do...,"[0.36720386147499084, 0.3295438587665558, 0.22...","[-0.020545760169625282, -0.0056816828437149525...",0.228879
