This notebook demonstrates how to generate predictions for a single female using one clustering method.

It aggregates the results from different clustering algorithms (now it uses only one clustering).

Given training data, a basic ranking method (FCN comparator) and a more complex approach (RLHF) can further be applied.

In [1]:
import numpy as np
import networkx as nx
import pickle
import pandas as pd
from sklearn.manifold import MDS
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Read graph
G = nx.read_weighted_edgelist('male_graph.edgelist')

In [3]:
# Read clusters
clusters = pd.read_csv("male_clusters_Node2Vec_KMeans.csv")

In [4]:
with open("cluster_to_centroid_Louvain", "rb") as f:
    cluster_to_centroid = pickle.load(f)

centroid_to_cluster: dict = {centroid: cluster for cluster, centroid in cluster_to_centroid.items()}

In [5]:
import pickle

with open("../data/features", "rb") as f:
    encoded_features = pickle.load(f)

data = pd.read_csv("../data/preprocessed_data.csv")

In [6]:
centroids_vec = [encoded_features[int(c)] for c in cluster_to_centroid.values()]

In [7]:
def match_female_with_males(female_id: int):
    female_vec = encoded_features[female_id].reshape(1, -1)
    distances = [cosine_similarity(female_vec, centroids_vec[i].reshape(1, -1))[0][0] for i in range(len(centroids_vec))]
    closest_cluster = np.argmin(distances) + 1
    return clusters[clusters["cluster_id"] == closest_cluster]["vertex_id"].astype(str).values

In [None]:
TEST_FEMALE = 51601

match_female_with_males(TEST_FEMALE)

In [None]:
data.iloc[51601]

In [None]:
data.iloc[949]