In [8]:
folder_path = '/Users/kana/Library/Mobile Documents/com~apple~CloudDocs/Codes/GWOT_raw_data/202411_data'
response_type = 'similarity'

In [None]:
import os
import pickle as pkl
import sys
sys.path.append('/Users/kana/Library/Mobile Documents/com~apple~CloudDocs/Codes/GWOT_colorprefrencequalia')
import numpy as np
import pandas as pd
import sklearn
import csv
from scipy.stats import pearsonr, spearmanr
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from mpl_toolkits.mplot3d import Axes3D
from sklearn.manifold import MDS
import seaborn as sns
import ot
import plotly.graph_objs as go
import plotly.express as px
from mpl_toolkits.axes_grid1 import make_axes_locatable
import utilityFunctions
print(sys.path)
from itertools import combinations

In [3]:
# Step 2: Eliminate practice trials
def filter_practice_trials(df):
    return df[df['practice_trial'] != 1]

# Step 3: Filter by response type
def filter_response_type(df, response_type):
    return df[df['response_type'] == response_type]

# Step 4: Extract first N trial indices
def extract_first_trial_indices(df, num_trials):
    return df['trials.thisIndex'].iloc[:num_trials].tolist()

# Step 5 & 6: Map trial indices to a sequential range
def transform_indices(df, trial_indices):
    filtered_df = df[df['trials.thisIndex'].isin(trial_indices)]
    unique_indices = list(filtered_df['trials.thisIndex'].unique())
    index_mapping = {original: new for new, original in enumerate(unique_indices, start=1)}
    filtered_df['transformed_index'] = filtered_df['trials.thisIndex'].map(index_mapping)
    return filtered_df

# Step 7: Plot scatter points and connections
def plot_scatter_with_connections(filtered_df):
    grouped = filtered_df.groupby('transformed_index')
    plt.figure(figsize=(10, 6))
    response_pairs = []

    for transformed_index, group in grouped:
        x = [transformed_index] * len(group)
        y = group['response'].tolist()

        identical_y_values = len(set(y)) == 1
        scatter_size = 100 if identical_y_values else 50

        plt.scatter(x, y, color='blue', s=scatter_size)

        if len(y) == 2:
            plt.plot([transformed_index, transformed_index], y, color='red', linewidth=1)
            response_pairs.append(y)

    return response_pairs

# Step 8: Calculate correlation between response pairs
def calculate_correlation(response_pairs):
    if response_pairs:
        first_responses = [pair[0] for pair in response_pairs]
        second_responses = [pair[1] for pair in response_pairs]
        return pd.Series(first_responses).corr(pd.Series(second_responses))
    return None

# Step 9: Display the plot with correlation
def display_plot(pair_correlation):
    plt.xlabel('Trial Index')
    plt.ylabel('Response')
    title_text = f'Double pass r: {pair_correlation:.2f}' if pair_correlation else "Double pass r: N/A"
    plt.title(title_text)
    plt.tight_layout()
    plt.show()

In [6]:
# Main function to process a single CSV file
def process_csv(file_path, response_type):
    df = utilityFunctions.load_csv(file_path)
    df = filter_practice_trials(df)
    df = filter_response_type(df, response_type)
    first_25_indices = extract_first_trial_indices(df, num_trials=25)
    filtered_df = transform_indices(df, first_25_indices)
    response_pairs = plot_scatter_with_connections(filtered_df)
    pair_correlation = calculate_correlation(response_pairs)
    display_plot(pair_correlation)

In [None]:
# Process all CSV files in the specified folder
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        file_path = os.path.join(folder_path, filename)
        process_csv(file_path, response_type)