In [1]:
import os
import glob

import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances

import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib as mpl

import warnings
warnings.filterwarnings('ignore')

import umap
import sys
sys.path.append('../custom_function')
import clustering # custom function

random_seed = 2022
klist = [3,4,5,6,7]

# Import data

In [2]:
input_dir = '../output_data/group/category/'
# all 59 participants
dissim_ctg_avg_159r_all = pd.read_csv(input_dir + 'dissim_ctg_avg_159rel_all_14p.csv',index_col=0)
dissim_mla_avg_159r_all = pd.read_csv(input_dir + 'dissim_mla_avg_159rel_all_14p.csv',index_col=0)

# Explicit task(M2)

In [3]:
umap_exp_159r_all = umap.UMAP(n_neighbors=15, min_dist=0.01, n_components=2,
                      metric = 'precomputed',
                      random_state=random_seed).fit_transform(dissim_ctg_avg_159r_all)

umap_exp_159r_all = pd.DataFrame(umap_exp_159r_all, columns=['Dim1','Dim2'], index=dissim_ctg_avg_159r_all.index)

In [4]:
umap_exp_results_159r_all = clustering.create_cluster_models(data_type = "df",data =umap_exp_159r_all,
                                                  methods= ['KMeans', 'Hierarchical'],ks=  klist, keep_orig=True)

clustering.cluster_results(umap_exp_results_159r_all,method_number='cl_k6')


Cluster 1 Relationship:29
nurse and patient, doctor and patient, prostitute and customer, landlord and tenant, representative and citizen, television host and guest, dealer and buyer, judge and lawyer, patient and caretaker, therapist and patient, driver and passenger, psychologist and client, reader and writer, customer and businessman, police officer and civilian, politician and supporter, salesperson and customer, guide and tourist, chef and customer, judge and contestant, sugar daddy and sugar baby, reviewer and writer, lawyer and client, athlete and referee, consultant and client, researcher and research subject, babysitter and infant, entrepreneur and investor, customer and server

Cluster 2 Relationship:23
parent and infant, distant relatives, ancestor and descendant, godparent and godchild, step parent and step child, aunt and niece/nephew, parent and child, foster parent and foster child, siblings, second cousins, uncle and niece/nephew, step grandparent and step grandchild, 

Cluster1: Transactional(29)  
Cluster2: Familial(23)   
Cluster3: Romantic(12)    
Cluster4: Hostile(24)  
Cluster5: Affiliation(34)   
Cluster6: Power(37)  

In [12]:
umap_exp_results_159r_all = clustering.create_cluster_models(data_type = "df",data =umap_exp_159r_all,
                                                  methods= ['KMeans', 'Hierarchical'],ks=  klist, keep_orig=True)

clustering.cluster_results(umap_exp_results_159r_all,method_number='cl_k3')


Cluster 1 Relationship:112
nurse and patient, a person and their crush, volunteer and organizer, doctor and patient, prostitute and customer, landlord and tenant, representative and citizen, a person and their deceased spouse, television host and guest, guest and host, neighbors, dealer and buyer, friends, judge and lawyer, officer and soldier, teammates in sports, patient and caretaker, therapist and patient, wife and husband, romantic lovers, brothers in arms, driver and passenger, employer and employee, university student and administrator, athletic trainer and trainee, native and tourist, co workers, bride and bridesmaid, alumni, classmates, psychologist and client, reader and writer, church members, friends with benefits, predecessor and successor, roommates, mentor and mentee, customer and businessman, superior and subordinate, victim and witness, leader and follower, bride and groom, union leader and union member, police officer and civilian, teacher and student, politician and

In [5]:
umap_exp_results_159r_all.to_csv('../output_data/group/category/cluster_results/umap_exp_results_159r_all_14p.csv')

# Implicit task(M1)

In [6]:
umap_imp_159r_all = umap.UMAP(n_neighbors=15, min_dist=0.01, n_components=2,
                              metric = 'precomputed',
                      random_state=random_seed).fit_transform(dissim_mla_avg_159r_all)
umap_imp_159r_all = pd.DataFrame(umap_imp_159r_all, columns=['Dim1','Dim2'], index=dissim_mla_avg_159r_all.index)

In [7]:
umap_imp_results_159r_all = clustering.create_cluster_models(data_type = "df",data =umap_imp_159r_all,
                                                  methods= ['KMeans', 'Hierarchical'],ks=  klist, keep_orig=True)

clustering.cluster_results(umap_imp_results_159r_all,method_number='cl_k3')


Cluster 1 Relationship:69
wife and husband, neighbors, half siblings, parent and teenager, church members, a person and their family friends, bride and bridesmaid, teammates in sports, twins, religious followers, step grandparent and step grandchild, second cousins, fans of the same sports team, siblings, close friends, fraternity brothers, sugar daddy and sugar baby, uncle and niece/nephew, childhood friends, man and his mistress, step siblings, confidants, grandparent and grandchild, romantic lovers, brother and sister, parent and infant, ancestor and descendant, peers, groom and groomsman, adoptive parents and adopted kids, a person and their significant other, strangers, predecessor and successor, a person and their crush, brothers in arms, playmates, roommates, friends, life partners, political allies, criminal and their accomplice, diplomats, close relatives, godparent and godchild, pen friends, casual acquaintances, step parent and step child, cohabitants, soul mates, long dist

Cluster1: Private(69)     
Cluster2: Public(63)       
Cluster3: Hostile(27)   

In [8]:
umap_imp_results_159r_all.to_csv('../output_data/group/category/cluster_results/umap_imp_results_159r_all_14p.csv')

# Generate RDM:0/1

In [9]:
def cluster_rdm(cluster_results_region,cluster_model='cl_k3'):
    dissim_cluster_result = pd.DataFrame(index = cluster_results_region.index,
                                         columns = cluster_results_region.index)
    for i in dissim_cluster_result.index:
        for j in dissim_cluster_result.columns:
            # cluster result was in the last column
            if cluster_results_region.loc[i,cluster_model] == cluster_results_region.loc[j,cluster_model]:
                dissim_cluster_result.loc[i,j] = 0
            else:
                dissim_cluster_result.loc[i,j] = 1
            dissim_cluster_result = dissim_cluster_result.astype(float)
            
    return(dissim_cluster_result)

In [13]:
exp_rdm = cluster_rdm(umap_exp_results_159r_all,cluster_model='cl_k6')
imp_rdm = cluster_rdm(umap_imp_results_159r_all,cluster_model='cl_k3')

In [14]:
exp_rdm.to_csv('../output_data/group/category/cluster_results/rdm_exp_14p.csv')
imp_rdm.to_csv('../output_data/group/category/cluster_results/rdm_imp_14p.csv')