In [2]:
import pandas as pd
import numpy as np
from itertools import combinations
from scipy.stats import pearsonr

In [3]:
# Load the original neural and word2vec similarity matrices
word2vec_sim_df = pd.read_csv('word2vec_sim.csv')
neural_sim_df = pd.read_csv('p1_sim.csv')

# Reorder the rows of the neural similarity matrix to match the order in the word2vec similarity matrix
neural_sim_df_reordered = neural_sim_df.set_index('term').reindex(word2vec_sim_df['term']).reset_index()

# List of words (excluding the 'term' column)
words = word2vec_sim_df.columns[1:]

# Initialize a list to store results
decoding_results = []

In [4]:
neural_sim_df_reordered

Unnamed: 0,term,airplane,ant,apartment,arch,arm,barn,bear,bed,bee,...,shirt,skirt,spoon,table,telephone,tomato,train,truck,watch,window
0,horse,0.830323,0.691065,0.698721,0.610093,0.56049,0.711925,0.867668,0.725328,0.902443,...,0.878576,0.822725,0.689416,0.726813,0.907903,0.679341,0.698553,0.770612,0.756502,0.762504
1,bear,0.82829,0.777583,0.762514,0.67184,0.529558,0.737495,1.0,0.751571,0.890533,...,0.834256,0.790998,0.703103,0.676546,0.809121,0.678013,0.748991,0.761983,0.698777,0.718538
2,cat,0.67727,0.754469,0.596658,0.491565,0.518258,0.622361,0.784209,0.598146,0.875222,...,0.813089,0.769078,0.557828,0.579006,0.770225,0.571532,0.669761,0.608832,0.678881,0.666203
3,dog,0.724007,0.800461,0.62162,0.505957,0.587157,0.619353,0.856424,0.701655,0.878088,...,0.840423,0.799265,0.682571,0.631826,0.807333,0.644053,0.618325,0.649231,0.727094,0.622478
4,cow,0.769298,0.834809,0.608525,0.506032,0.602313,0.651241,0.842841,0.736045,0.826075,...,0.777312,0.749963,0.697471,0.659164,0.764701,0.561507,0.650916,0.690289,0.746676,0.579022
5,leg,0.521596,0.636324,0.445907,0.395335,0.589304,0.470127,0.576497,0.547282,0.736508,...,0.67878,0.688187,0.555059,0.60755,0.724612,0.324236,0.5867,0.483917,0.739074,0.610803
6,arm,0.611928,0.635829,0.231247,0.280516,1.0,0.35648,0.529558,0.465465,0.56751,...,0.494433,0.486777,0.722344,0.646708,0.574842,0.211426,0.465599,0.566975,0.644681,0.376803
7,hand,0.669398,0.691927,0.48226,0.423684,0.68308,0.513918,0.703956,0.535473,0.819981,...,0.778,0.759455,0.64986,0.617186,0.772525,0.528932,0.552607,0.6094,0.672264,0.623167
8,foot,0.709507,0.659346,0.448296,0.43504,0.846284,0.526572,0.695968,0.619548,0.758506,...,0.727838,0.678832,0.789202,0.754221,0.773986,0.449388,0.553614,0.694302,0.781115,0.589509
9,eye,0.826608,0.711492,0.744343,0.680787,0.614393,0.772128,0.797959,0.805236,0.858017,...,0.833419,0.795285,0.734834,0.835153,0.825829,0.518579,0.823249,0.822879,0.852831,0.808566


In [None]:
# Iterate over all unique pairs of words
for word1, word2 in combinations(words, 2):
    # Extract vectors
    word1_neural_vector = neural_sim_df_reordered[word1]
    word2_neural_vector = neural_sim_df_reordered[word2]
    word1_semantic_vector = word2vec_sim_df[word1]
    word2_semantic_vector = word2vec_sim_df[word2]

    # Remove indices with perfect correlations
    perfect_corr_indices = (word1_neural_vector == 1.0) | (word2_neural_vector == 1.0) | \
                           (word1_semantic_vector == 1.0) | (word2_semantic_vector == 1.0)
    word1_neural_vector_filtered = word1_neural_vector[~perfect_corr_indices]
    word2_neural_vector_filtered = word2_neural_vector[~perfect_corr_indices]
    word1_semantic_vector_filtered = word1_semantic_vector[~perfect_corr_indices]
    word2_semantic_vector_filtered = word2_semantic_vector[~perfect_corr_indices]

    # Calculate correlations
    corr_word1_neural_semantic = pearsonr(word1_neural_vector_filtered, word1_semantic_vector_filtered)[0]
    corr_word1_neural_word2_semantic = pearsonr(word1_neural_vector_filtered, word2_semantic_vector_filtered)[0]
    corr_word2_neural_semantic = pearsonr(word2_neural_vector_filtered, word2_semantic_vector_filtered)[0]
    corr_word2_neural_word1_semantic = pearsonr(word2_neural_vector_filtered, word1_semantic_vector_filtered)[0]

    # Check decoding accuracy
    decode_accuracy_word1 = corr_word1_neural_semantic > corr_word1_neural_word2_semantic
    decode_accuracy_word2 = corr_word2_neural_semantic > corr_word2_neural_word1_semantic

    # Append results
    decoding_results.append({
        'word1': word1,
        'word2': word2,
        'corr_word1_neural_semantic': corr_word1_neural_semantic,
        'corr_word1_neural_word2_semantic': corr_word1_neural_word2_semantic,
        'corr_word2_neural_semantic': corr_word2_neural_semantic,
        'corr_word2_neural_word1_semantic': corr_word2_neural_word1_semantic,
        'decode_accuracy_word1': decode_accuracy_word1,
        'decode_accuracy_word2': decode_accuracy_word2
    })

# Convert results to a DataFrame
decoding_results_df = pd.DataFrame(decoding_results)

# Save the results to a CSV file
decoding_results_df.to_csv('decoding_results.csv', index=False)
