In [1]:
# pip install -U sentence-transformers

In [2]:
# pip install tf-keras

In [1]:
# pip install accelerate

In [None]:
import pandas as pd
import numpy as np
import re
from collections import defaultdict
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import cosine_distances
from accelerate import init_empty_weights

2025-04-08 00:49:11.593468: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
df = pd.read_excel('value structure 2022-2024.xlsx')

In [4]:
df.head()

Unnamed: 0,id_ego,Person Name,From,To,NoMeaningfulEdges,Operations,Data
0,7023.0,Josh Adler,strength,ecstasy,1.0,,2022 A
1,7023.0,Josh Adler,strength,discovery,1.0,,2022 A
2,7023.0,Josh Adler,strength,bravery,1.0,,2022 A
3,7023.0,Josh Adler,strength,calm,1.0,,2022 A
4,7023.0,Josh Adler,calm,connected to the universe,1.0,,2022 A


# User-Values Dict

In [None]:
# Step 1: Basic cleaning function (same as before)
def clean_text(text):
    if pd.isna(text):
        return None
    text = str(text).strip().lower()
    text = re.sub(r'[^a-zA-Z0-9\s\-]', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text

# Step 2: Clean From and To columns
df['From_clean'] = df['From'].apply(clean_text)
df['To_clean'] = df['To'].apply(clean_text)

# Step 3: Build individual_values dictionary
individual_values = defaultdict(set)

for _, row in df.iterrows():
    pid = str(row['id_ego']).strip()
    name = row['Person Name'].strip()
    key = (pid, name)
    
    from_val = row['From_clean']
    to_val = row['To_clean']
    
    if from_val: individual_values[key].add(from_val)
    if to_val: individual_values[key].add(to_val)

# Convert sets to sorted lists
individual_values = {
    k: sorted(list(v)) for k, v in individual_values.items()
}

# Preview:
for k, v in list(individual_values.items())[:5]:
    print(f"{k}: {v}")

('7023.0', 'Josh Adler'): ['bravery', 'calm', 'connected to the universe', 'discovery', 'ecstasy', 'growth', 'strength', 'zen']
('7053.0', 'Mohit Agarwal'): ['empathy', 'family', 'gratitude', 'make things better', 'organization', 'peace', 'professionalism', 'quality']
('7051.0', 'Jamie Bergos'): ['beauty', 'connection to the universe', 'excellence', 'purpose', 'relationships', 'resilience', 'service', 'strong work ethic']
('7059.0', 'Jeremy Browning'): ['achievement', 'beauty', 'family', 'health', 'knowledge', 'legacy', 'respect', 'self awareness']
('7059.0', 'Jeremy Browni+D372'): ['beauty', 'knowledge']


# Top Values

In [5]:
# group by id_ego
def get_top_values(subdf):
    from_set = set(subdf['From'].dropna())
    to_set = set(subdf['To'].dropna())
    top_values = to_set - from_set
    return top_values

df_top = df.groupby('id_ego').apply(get_top_values).reset_index()
df_top.columns = ['id_ego', 'top_values']
df_top = pd.DataFrame(df_top)

  df_top = df.groupby('id_ego').apply(get_top_values).reset_index()


In [6]:
df_top.head()

Unnamed: 0,id_ego,top_values
0,4000.0,{happiness}
1,4001.0,{stability}
2,4002.0,"{joy, achievement}"
3,4003.0,{inner peace}
4,4004.0,{happiness}


# Embeddings

In [20]:
concepts = ['accuracy', 'speed', 'vigilance', 'optimism', 'persistence', 
            'change', 'detailed', 'holistic', 'analytic', 'creative']

all_values = pd.concat([df['From'], df['To']]).dropna().unique()

all_phrases = list(set(list(all_values) + concepts))

model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
embeddings = model.encode(all_phrases, convert_to_tensor=True)

embedding_dict = dict(zip(all_phrases, embeddings))

embedding_df = pd.DataFrame(embeddings.numpy(), index=all_phrases)
embedding_df = embedding_df.reset_index()
embedding_df = embedding_df.rename(columns={'index': 'phrase'})

In [22]:
embedding_df

Unnamed: 0,phrase,0,1,2,3,4,5,6,7,8,...,374,375,376,377,378,379,380,381,382,383
0,positivity + confidence,0.026234,0.035015,0.012151,0.006427,0.012661,0.009620,0.127684,0.057459,0.004133,...,-0.053474,0.091928,-0.005845,0.004795,-0.143238,-0.021696,0.126578,-0.037246,-0.041064,-0.010763
1,presence,0.021319,-0.013219,-0.009857,0.010067,0.087104,-0.031084,0.117486,-0.021535,0.016270,...,0.054813,0.034247,0.049290,0.080219,-0.011850,0.040717,0.130473,0.018873,0.027246,0.045869
2,fairness,-0.022948,0.022094,0.003392,-0.050883,0.017585,-0.029168,0.070214,-0.004920,0.075820,...,0.127285,-0.052596,0.054680,0.022932,-0.087675,0.009988,0.202169,-0.012812,-0.007927,0.008181
3,do the right things,-0.012630,0.062107,0.029576,-0.004670,-0.089982,0.030868,0.065208,-0.105340,-0.018404,...,0.094185,-0.017873,0.024740,0.006590,-0.016032,0.027183,0.079268,0.006899,0.019591,-0.005181
4,financial,0.021630,0.054482,-0.065966,0.050728,-0.017508,-0.013171,0.120330,0.051142,0.072215,...,0.001870,-0.006667,-0.041264,0.039454,-0.087942,-0.042742,0.110148,-0.087574,0.042817,-0.009624
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
613,commonality,0.103484,-0.053167,-0.015204,-0.065909,0.034013,0.044022,0.068628,-0.001131,-0.038993,...,0.102093,-0.048240,0.008553,0.029172,0.014776,-0.076072,0.095032,0.005476,0.056189,-0.021451
614,unity,-0.040994,0.043965,-0.028814,0.008841,-0.032412,0.027162,0.128169,0.041329,-0.028691,...,0.095836,0.042979,0.024293,-0.016263,0.009657,0.026417,0.032564,0.059558,-0.077601,-0.003696
615,connection,-0.058691,-0.031273,-0.056097,0.011492,-0.093341,0.039388,0.091452,-0.044531,0.047105,...,0.017396,-0.038827,0.001782,0.015954,-0.105533,0.061937,0.010957,0.084682,0.026206,-0.090987
616,helping others,-0.043025,0.026870,-0.000474,-0.023933,0.014208,0.024386,0.096656,-0.021426,-0.031601,...,0.067834,-0.010634,0.029085,0.058292,0.037300,0.036800,0.035578,0.006987,-0.050729,-0.001173


# Cosine_distance

In [60]:
# Step 1: Separate value and concept embeddings from a combined dictionary
value_embeddings = {k: v for k, v in embedding_dict.items() if k not in concepts}
concept_embeddings = {k: v for k, v in embedding_dict.items() if k in concepts}

# Step 2: Build distance matrix from each value to each concept
# Convert dicts to matrix form
value_names = list(value_embeddings.keys())
concept_names = list(concept_embeddings.keys())

value_matrix = np.vstack([value_embeddings[k] for k in value_names])
concept_matrix = np.vstack([concept_embeddings[k] for k in concept_names])

# Step 3: Compute cosine distances
distance_matrix = cosine_distances(value_matrix, concept_matrix)

# Step 4: Create DataFrame with distances (rows = values, columns = concepts)
distance_df = pd.DataFrame(distance_matrix, columns=concept_names)
distance_df['value'] = value_names

concept_order = [
    'accuracy', 'speed',
    'vigilance', 'optimism',
    'persistence', 'change',
    'detailed', 'holistic',
    'analytic', 'creative'
]
cols = ['value'] + concept_order
distance_df = distance_df[cols]

In [61]:
distance_df

Unnamed: 0,value,accuracy,speed,vigilance,optimism,persistence,change,detailed,holistic,analytic,creative
0,positivity + confidence,0.713628,0.955073,0.723678,0.548133,0.831447,0.960217,0.849708,0.898357,0.878852,0.881303
1,presence,0.794223,0.756751,0.598873,0.776779,0.587591,0.776883,0.743463,0.694338,0.808637,0.728672
2,fairness,0.761288,0.762568,0.829507,0.745232,0.806502,0.743097,0.762712,0.930515,0.922238,0.755190
3,do the right things,0.785837,0.829908,0.776016,0.729848,0.719510,0.664978,0.782965,0.820403,0.907217,0.787302
4,financial,0.787531,0.648659,0.879571,0.695945,0.703519,0.718805,0.719594,0.887873,0.832240,0.758707
...,...,...,...,...,...,...,...,...,...,...,...
603,commonality,0.807326,0.814786,0.803145,0.888206,0.735316,0.813536,0.692229,0.656647,0.927071,0.766658
604,unity,0.758135,0.642623,0.759482,0.762893,0.711621,0.723269,0.761348,0.760094,0.737237,0.753085
605,connection,0.792205,0.636313,0.852377,0.777091,0.756096,0.727708,0.783167,0.767124,0.775124,0.783469
606,helping others,0.903206,0.790396,0.673056,0.758706,0.776476,0.740198,0.808232,0.760247,0.904881,0.774766


# Cosine_score

In [29]:
# Step 1: Define direction vectors using your definitions
direction_defs = {
    'speed': ('accuracy', 'speed'),
    'optimism': ('vigilance', 'optimism'),
    'change': ('persistence', 'change'),
    'holistic': ('detailed', 'holistic'),
    'creative': ('analytic', 'creative'),
}

# Step 2: Create lookup dictionary from embedding_df
value_embedding_dict = dict(
    zip(
        embedding_df['phrase'],
        embedding_df.drop(columns='phrase').values
    )
)

# Step 3: Construct direction vectors: direction = pos - neg
direction_vectors = {
    dim: value_embedding_dict[pos] - value_embedding_dict[neg]
    for dim, (neg, pos) in direction_defs.items()
}

In [48]:
# Step 1: Initialize a list to store each row of results
distance_rows = []

# Step 2: For each value, compute cosine distance to each psychological direction
for val in value_names:
    val_vec = value_embedding_dict[val]  # Get the embedding vector of the value
    row = {'value': val}  # Start a row with the value name
    
    for dim, dir_vec in direction_vectors.items():
        # Compute cosine distance between the value and the direction vector
        distance = cosine_distances([val_vec], [dir_vec])[0][0]
        row[dim] = distance  # Store the result under the dimension name
    
    # Add the row to the list
    distance_rows.append(row)

# Step 3: Convert the list of rows into a DataFrame
value_direction_df = pd.DataFrame(distance_rows)


In [49]:
value_direction_df.head()

Unnamed: 0,value,speed,optimism,change,holistic,creative
0,positivity + confidence,1.224436,0.840429,1.107781,1.038936,1.001953
1,presence,0.965168,1.161717,1.158438,0.960682,0.936277
2,fairness,1.001189,0.923394,0.946929,1.134302,0.866882
3,do the right things,1.040967,0.958033,0.954357,1.029964,0.904441
4,financial,0.870911,0.833084,1.012794,1.134683,0.941402


# Individual_Score

In [54]:
# Step 1: Convert value_direction_df to lookup dictionary
value_score_dict = value_direction_df.set_index('value').to_dict(orient='index')
# {'self-harmony': {'speed': ..., 'optimism': ..., ...}, ...}

# Step 2: Prepare result list
individual_scores = []

# Step 3: For each individual, aggregate scores across their values
for (pid, name), values in individual_values.items():
    score_sum = {'speed': 0, 'optimism': 0, 'change': 0, 'holistic': 0, 'creative': 0}
    count = 0
    
    for val in values:
        if val in value_score_dict:
            for dim in score_sum:
                score_sum[dim] += value_score_dict[val][dim]
            count += 1
    
    # Compute average if count > 0
    if count > 0:
        avg_scores = {dim: score_sum[dim]/count for dim in score_sum}
    else:
        avg_scores = {dim: None for dim in score_sum}  # or use np.nan

    # Add ID and name
    avg_scores['id'] = pid
    avg_scores['name'] = name
    individual_scores.append(avg_scores)

# Step 4: Convert to DataFrame
individual_score_df = pd.DataFrame(individual_scores)

# Reorder columns
cols = ['id', 'name', 'speed', 'optimism', 'change', 'holistic', 'creative']
individual_score_df = individual_score_df[cols]


In [55]:
individual_score_df

Unnamed: 0,id,name,speed,optimism,change,holistic,creative
0,7023.0,Josh Adler,0.920541,1.005041,1.012527,1.021717,0.936188
1,7053.0,Mohit Agarwal,0.952482,0.956239,0.986481,1.023192,0.908079
2,7051.0,Jamie Bergos,0.948666,1.008218,1.066455,1.032304,0.932637
3,7059.0,Jeremy Browning,0.970015,0.995761,1.012033,1.054411,0.921182
4,7059.0,Jeremy Browni+D372,0.982139,0.963243,1.037261,1.082329,0.903842
...,...,...,...,...,...,...,...
720,10107.0,Christopher Morris,0.980903,0.962864,0.995594,1.057146,0.934673
721,10105.0,Busisiwe Mombaur,1.007522,0.945221,1.047138,1.113212,0.933475
722,,Max Quinn,0.934258,0.920721,1.009979,1.052446,0.922240
723,10125.0,Curtis Rose,0.949117,0.901995,1.062297,1.095718,0.888292


# Individual Distance

In [64]:
# Step 1: Reshape the distance_df to long format
value_concept_wide = distance_df.melt(id_vars='value', 
                                            var_name='concept', 
                                            value_name='distance')

# Step 2: Map individuals to their values
rows = []

for (pid, name), values in individual_values.items():
    for val in values:
        matched_rows = value_concept_wide[value_concept_wide['value'] == val]
        for _, row in matched_rows.iterrows():
            rows.append({
                'id': pid,
                'name': name,
                'value': val,
                'concept': row['concept'],
                'distance': row['distance']
            })

# Step 3: Build long-format DataFrame
long_df = pd.DataFrame(rows)

# Step 4: Pivot into wide format (concepts as columns)
wide_df = long_df.pivot_table(
    index=['id', 'name', 'value'],
    columns='concept',
    values='distance'
).reset_index()

cols_dis = ['name', 'value'] + concept_order
wide_df = wide_df[cols_dis]


In [65]:
wide_df

concept,name,value,accuracy,speed,vigilance,optimism,persistence,change,detailed,holistic,analytic,creative
0,Sevinj Abdullayeva,appreciation,0.758332,0.740392,0.648256,0.619869,0.828005,0.682074,0.737746,0.775258,0.737472,0.677993
1,Sevinj Abdullayeva,balance,0.731530,0.730251,0.718066,0.691257,0.678201,0.703486,0.781066,0.793019,0.774812,0.749840
2,Sevinj Abdullayeva,family,0.776483,0.675549,0.824004,0.749274,0.712656,0.712907,0.703903,0.803564,0.869543,0.741106
3,Sevinj Abdullayeva,happiness,0.878913,0.679593,0.678123,0.460599,0.610629,0.708003,0.723332,0.730679,0.726232,0.631593
4,Sevinj Abdullayeva,peace,0.802051,0.683757,0.625415,0.624267,0.735466,0.666284,0.675184,0.728380,0.816398,0.641782
...,...,...,...,...,...,...,...,...,...,...,...,...
5634,Yuna Lee,impact,0.738506,0.680743,0.732874,0.751373,0.756305,0.642927,0.691633,0.891178,0.728901,0.745725
5635,Yuna Lee,motivation,0.801043,0.774864,0.743445,0.599723,0.655406,0.743651,0.689161,0.802328,0.799160,0.680263
5636,Yuna Lee,priority,0.713290,0.613894,0.776329,0.740348,0.685665,0.756895,0.678274,0.780979,0.831897,0.775832
5637,Yuna Lee,self confidence,0.614771,0.814183,0.663327,0.508814,0.656593,0.892471,0.697159,0.827957,0.886822,0.827363
