In [1]:
import numpy as np
import networkx as nx
import pickle
import pandas as pd
from sklearn.manifold import MDS
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Read graph
G = nx.read_weighted_edgelist('male_graph.edgelist')

In [3]:
# Read clusters
clusters = pd.read_csv("male_clusters_Louvain.csv")

In [4]:
with open("cluster_to_centroid_Louvain", "rb") as f:
    cluster_to_centroid = pickle.load(f)

centroid_to_cluster: dict = {centroid: cluster for cluster, centroid in cluster_to_centroid.items()}

In [5]:
import pickle

with open("../data/features", "rb") as f:
    encoded_features = pickle.load(f)

data = pd.read_csv("../data/preprocessed_data.csv")

In [6]:
centroids_vec = [encoded_features[int(c)] for c in cluster_to_centroid.values()]

In [7]:
def match_female_with_males(female_id: int):
    female_vec = encoded_features[female_id].reshape(1, -1)
    distances = [cosine_similarity(female_vec, centroids_vec[i].reshape(1, -1))[0][0] for i in range(len(centroids_vec))]
    closest_cluster = np.argmin(distances) + 1
    return clusters[clusters["cluster_id"] == closest_cluster]["vertex_id"].astype(str).values

In [8]:
TEST_FEMALE = 51601

match_female_with_males(TEST_FEMALE)

array(['1', '1291', '2103', '6355', '6896', '3685', '4865', '7331', '18',
       '2242', '19', '22', '28', '49', '5661', '64', '66', '7938', '69',
       '82', '112', '114', '120', '2121', '127', '128', '1138', '137',
       '138', '139', '154', '158', '170', '4662', '189', '194', '209',
       '225', '301', '406', '724', '771', '979', '1007', '1017', '1079',
       '1328', '1505', '1714', '1753', '2158', '2289', '2806', '2812',
       '2820', '2896', '2902', '2908', '3072', '3212', '3265', '3351',
       '3446', '3652', '3703', '3785', '3905', '4130', '4407', '4461',
       '4494', '4523', '4556', '4741', '4744', '4905', '4989', '5276',
       '5326', '5376', '5525', '5563', '5684', '5820', '6124', '6346',
       '6405', '6447', '6508', '6832', '7036', '7168', '7455', '7588',
       '7627', '7749', '241', '278', '279', '280', '308', '317', '319',
       '320', '328', '347', '8309', '348', '359', '373', '378', '382',
       '416', '420', '431', '433', '444', '7420', '454', '464', '467'

In [9]:
data.iloc[51601]

age                                                           31
status                                                    single
sex                                                            f
orientation                                             straight
body_type                                              mesomorph
drinks                                                  socially
education                                        College or more
height                                                     157.0
job                                                  unspecified
location                               san francisco, california
religion                                             agnosticism
smokes                                                        no
essay0         real deal: i am sensitive (sorry, tears includ...
essay1         right now...watching 'celebrity rehab' and com...
essay2         eating chocolate and doing pilates (usually no...
essay3         my hair? e

In [10]:
data.iloc[949]

age                                                           31
status                                                    single
sex                                                            m
orientation                                             straight
body_type                                              mesomorph
drinks                                                  socially
education                                        College or more
height                                                     188.0
job                                         education / academia
location                                     oakland, california
religion                    judaism but not too serious about it
smokes                                                        no
essay0         i've leaped over medium sized boulders in a si...
essay1         playing basketball, soccer, surfing, and laugh...
essay2         soccer, basketball, boggle, pool (the game wit...
essay3         i'll ask y

In [11]:
encoded_features = [
    [1, 2, 1, 0],
    [1, 2, 1, 0],
    [1, 1, 1, 1],
    [1, 2, 3, 4]
]

In [12]:
from scipy.spatial.distance import cdist
cdist(encoded_features, encoded_features, metric='hamming')

array([[0.  , 0.  , 0.5 , 0.5 ],
       [0.  , 0.  , 0.5 , 0.5 ],
       [0.5 , 0.5 , 0.  , 0.75],
       [0.5 , 0.5 , 0.75, 0.  ]])

In [13]:
data["drinks"]

0          socially
1             often
2          socially
3          socially
4          socially
            ...    
51601      socially
51602      socially
51603         often
51604    not at all
51605      socially
Name: drinks, Length: 51606, dtype: object

In [14]:
data["body_type"]

0        endomorph
1        mesomorph
2        ectomorph
3        ectomorph
4        mesomorph
           ...    
51601    mesomorph
51602    mesomorph
51603    mesomorph
51604    mesomorph
51605    mesomorph
Name: body_type, Length: 51606, dtype: object

In [29]:
data.iloc[0]

age                                                           22
status                                                    single
sex                                                            m
orientation                                             straight
body_type                                              endomorph
drinks                                                  socially
education                                        College or more
height                                                     190.0
job                                               transportation
location                         south san francisco, california
religion                   agnosticism and very serious about it
smokes                                                 sometimes
essay0         about me:  i would love to think that i was so...
essay1         currently working as an international agent fo...
essay2         making people laugh. ranting about a good salt...
essay3         the way i 