In [None]:
import pandas as pd 
import numpy as np

# Functions

In [None]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer, util

class PsychologicalTestAnalyzer:
    def __init__(self, models, model_short):
        """
        Initializes the analyzer with embedding models.
        :param models: List of sentence transformer model names.
        :param model_short: Shortened names for output file naming.
        """
        self.models = models
        self.model_short = model_short
        self.loaded_models = {mod: SentenceTransformer(mod) for mod in models}  # Load models once

    def generate_embeddings(self, df_items):
        """
        Computes sentence embeddings for all items.
        :param df_items: DataFrame containing psychological test items.
        :return: DataFrame with embeddings for each model.
        """
        for mod in self.models:
            item_embed = []
            item_embed_rev = []
            
            for item in range(len(df_items)):
                encoded_item = self.loaded_models[mod].encode(df_items['Item'].iloc[item])
                item_embed.append(encoded_item)
            
            # Store embeddings in DataFrame
            df_items[mod + '_embeddings'] = item_embed
        
        return df_items

    def compute_cosine_similarity(self, df_items):
        """
        Computes cosine similarity matrices and saves them.
        :param df_items: DataFrame with computed embeddings.
        :param output_dir: Directory to store cosine similarity matrices.
        """
        matrices = []
        for i, mod in enumerate(self.models):
            cosine_sim_matrix = util.pytorch_cos_sim(df_items[mod + '_embeddings'], df_items[mod + '_embeddings']).numpy()
            
            # Fill diagonal with 1 to avoid EFA functions treating similarity as covariance
            np.fill_diagonal(cosine_sim_matrix, 1)

            # Save the matrix as CSV
            matrices.append(pd.DataFrame(
                cosine_sim_matrix, 
                columns=df_items['Item'].unique(), 
                index=df_items['Item'].unique()
            ))
        return matrices

    def analyze_tests(self, test_dataframes):
        """
        Loops through all test dataframes, computes embeddings, and cosine similarities.
        :param test_dataframes: Dictionary of test names and corresponding DataFrames.
        """
        for test_name, df in test_dataframes.items():
            print(f"Processing: {test_name}")
            df = self.generate_embeddings(df)  # Generate embeddings
            out = self.compute_cosine_similarity(df)  # Compute and store cosine similarity matrices
            return out

# Analysis

In [None]:
import json
import pandas as pd

# Load JSON
with open('./Item_data/items.json', 'r') as f:
    data = json.load(f)

# Loop through all elements of the JSON
for category, tests in data.items():
    print(f"Category: {category}")
    
    for test_name, test_data in tests.items():
        df = pd.DataFrame(test_data["dataframe"])  # Convert items to DataFrame
        print(f"Test: {test_name} - Data Shape: {df.shape}")

        # Example analysis: adding PsychologicalTestAnalyzer (as below) once we have more data
#        print(df.isnull().sum())

        # Save results per scale?


In [None]:
# Define models and their short names
models = ['nli-distilroberta-base-v2', 'all-mpnet-base-v2', 'sentence-transformers/all-MiniLM-L6-v2', 'intfloat/e5-large-v2', 'LaBSE']
model_short = ['distilroberta', 'mpnet', 'miniLM', 'e5', 'labse']

# Create an instance of the analyzer
analyzer = PsychologicalTestAnalyzer(models, model_short)

# Example psychological test datasets (replace with actual DataFrames)
test_dataframes = {
    "DASS21": pd.DataFrame(data['Clinical']['DASS21']['dataframe'])
}

# Run the full analysis
out = analyzer.analyze_tests(test_dataframes)


In [15]:
pd.DataFrame(out[4])

Unnamed: 0,I couldn't seem to experience any positive feeling at all.,I found it difficult to work up the initiative to do things.,I felt that I had nothing to look forward to.,I felt down-hearted and blue.,I was unable to become enthusiastic about anything.,I felt I wasn't worth much as a person.,I felt that life was meaningless.,I was aware of dryness of my mouth.,"I experienced breathing difficulty (e.g., excessively rapid breathing, breathlessness in the absence of physical exertion).","I experienced trembling (e.g., in the hands).",...,I felt I was close to panic.,"I was aware of the action of my heart in the absence of physical exertion (e.g., sense of heart rate increase, heart missing a beat).",I felt scared without any good reason.,I found it hard to wind down.,I tended to over-react to situations.,I felt that I was using a lot of nervous energy.,I found myself getting agitated.,I found it difficult to relax.,I was intolerant of anything that kept me from getting on with what I was doing.,I felt that I was rather touchy.
I couldn't seem to experience any positive feeling at all.,1.0,0.383965,0.606741,0.420673,0.603171,0.509455,0.453506,0.184104,0.363924,0.305171,...,0.370698,0.336013,0.495335,0.330663,0.351518,0.42496,0.348355,0.309677,0.365458,0.483045
I found it difficult to work up the initiative to do things.,0.383965,1.0,0.465581,0.358736,0.500448,0.400574,0.407488,0.264239,0.341545,0.311755,...,0.374929,0.31256,0.331274,0.543815,0.453413,0.417746,0.481172,0.528896,0.553786,0.433519
I felt that I had nothing to look forward to.,0.606741,0.465581,1.0,0.468913,0.578393,0.597991,0.5816,0.308226,0.307719,0.279187,...,0.518826,0.336701,0.603488,0.437208,0.346194,0.422896,0.49117,0.41152,0.55092,0.565838
I felt down-hearted and blue.,0.420673,0.358736,0.468913,1.0,0.353741,0.426786,0.530124,0.404324,0.247528,0.344935,...,0.499995,0.316135,0.472123,0.473479,0.313508,0.408766,0.504284,0.417581,0.245571,0.529634
I was unable to become enthusiastic about anything.,0.603171,0.500448,0.578393,0.353741,1.0,0.42797,0.35419,0.251835,0.304859,0.274299,...,0.351479,0.296571,0.483622,0.392876,0.421779,0.318249,0.442951,0.378527,0.650492,0.380753
I felt I wasn't worth much as a person.,0.509455,0.400574,0.597991,0.426786,0.42797,1.0,0.624358,0.214731,0.325147,0.307154,...,0.455729,0.347359,0.468131,0.38383,0.345007,0.46194,0.414719,0.391034,0.35834,0.632524
I felt that life was meaningless.,0.453506,0.407488,0.5816,0.530124,0.35419,0.624358,1.0,0.3493,0.359579,0.30648,...,0.488191,0.394085,0.510055,0.403003,0.263538,0.445237,0.474802,0.393569,0.281132,0.554048
I was aware of dryness of my mouth.,0.184104,0.264239,0.308226,0.404324,0.251835,0.214731,0.3493,1.0,0.241082,0.34537,...,0.431784,0.271391,0.305833,0.484627,0.288842,0.319141,0.446842,0.37377,0.242437,0.423612
"I experienced breathing difficulty (e.g., excessively rapid breathing, breathlessness in the absence of physical exertion).",0.363924,0.341545,0.307719,0.247528,0.304859,0.325147,0.359579,0.241082,1.0,0.641646,...,0.305667,0.767366,0.298341,0.371706,0.296913,0.345481,0.383231,0.420951,0.298386,0.284783
"I experienced trembling (e.g., in the hands).",0.305171,0.311755,0.279187,0.344935,0.274299,0.307154,0.30648,0.34537,0.641646,1.0,...,0.380388,0.639936,0.251434,0.352163,0.36944,0.315293,0.461197,0.359594,0.288893,0.402303
