In [163]:
import pandas as pd
import scipy.linalg as la
from scipy import stats
import numpy as np
import matplotlib.pyplot as plt

In [128]:
file_path = "grapheme-color-trials-2023-09-12-13-36-11_test_1.csv"
file_path2 = "grapheme-color-trials-2023-09-12-13-36-11_test_2.csv"
df = pd.read_csv(file_path)
df2 = pd.read_csv(file_path)


In [129]:
def rgb_diff(group):
    rgb_values = group[['r', 'g', 'b']].values
    rgb1, rgb2, rgb3 = rgb_values[0], rgb_values[1], rgb_values[2]
    rgb_diff = abs(rgb1 - rgb2) + abs(rgb2 - rgb3) + abs(rgb3 - rgb1)

    #returns a new dataset with the average difference in RGB values for each stimulus
    return pd.Series(rgb_diff, index=['v_diff_r', 'v_diff_g', 'v_diff_b'])

def finalize_score(result):
    result_norm = result[['diff_r', 'diff_g', 'diff_b']] / 255
    N = len(result)
    test_score_rgb = result_norm.sum() / N
    final_test_score = test_score_rgb.sum()
    
    return final_test_score




In [130]:
# these lines of code calculates the RGB differences for each stimuli across the two tests

 
# these are the two tests.
# Group by 'stimulus' and apply the rgb_diff function

# V denotes the visual condition
v_result = df.groupby('stimulus').apply(rgb_diff).reset_index()
# A denotes the auditory condition
a_result = df2.groupby('stimulus').apply(rgb_diff).reset_index()
#print(v_result, a_result)

# Here we just rename each column in the auditory test because the rgb_diff function returns labels specified for V condition.
# Renaming will be important when we merge the datasets later on
new_column_names = {'v_diff_r': 'a_diff_r', 'v_diff_g': 'a_diff_g', 'v_diff_b': 'a_diff_b'}
new_column_names = {'v_diff_r': 'a_diff_r', 'v_diff_g': 'a_diff_g', 'v_diff_b': 'a_diff_b'}
a_result.rename(columns=new_column_names, inplace=True)


# merges the two dataframes from visual and auditory tests
total_score = v_result.merge(a_result, on='stimulus')

# the dataset changed shape from (4, 35) to (7, 35)
print(f"The dataset changed shape from {v_result.shape} to {total_score.shape}")


# now we want to rename the columns in A condition and V condition. We do this just to utilize the same functions
# to guarantee consistency when handling the data 

# Visual condition
new_column_names = {'v_diff_r': 'diff_r', 'v_diff_g': 'diff_g', 'v_diff_b': 'diff_b'}
v_result.rename(columns=new_column_names, inplace=True)
# Auditory condition
new_column_names = {'a_diff_r': 'diff_r', 'a_diff_g': 'diff_g', 'a_diff_b': 'diff_b'}
a_result.rename(columns=new_column_names, inplace=True)


# now that total score dataset has been created, we can perform some simple arithmetics to get the total 
# difference in RGB values:
total_score['diff_r'] = abs(total_score['v_diff_r'] - total_score['a_diff_r'])
total_score['diff_g'] = abs(total_score['v_diff_g'] - total_score['a_diff_g'])
total_score['diff_b'] = abs(total_score['v_diff_b'] - total_score['a_diff_b'])


# here's where the score is normalized and divided by N:
#score = finalize_score(v_result)
#print(f"the final score of the participant is: {twin_score}")

#creates the dataset for twin score
twin_dataset = total_score[['diff_r', 'diff_g', 'diff_b']]

#normalizes the value 
twin_score = finalize_score(total_score)
v_score = finalize_score(v_result)
a_score = finalize_score(a_result)
print(f"The visual score of the participant is: {v_score}")
print(f"The auditory score of the participant is: {a_score}")
print(f"the twin score of the participant is: {twin_score}")
                          

The dataset changed shape from (36, 4) to (36, 7)
The visual score of the participant is: 2.922440087145969
The auditory score of the participant is: 2.922440087145969
the twin score of the participant is: 0.0


In [131]:
print(total_score.head())

  stimulus  v_diff_r  v_diff_g  v_diff_b  a_diff_r  a_diff_g  a_diff_b  \
0        0       326       368       144       326       368       144   
1        1       278       396       276       278       396       276   
2        2       272        78       380       272        78       380   
3        3        44       302       324        44       302       324   
4        4       206       196       192       206       196       192   

   diff_r  diff_g  diff_b  
0       0       0       0  
1       0       0       0  
2       0       0       0  
3       0       0       0  
4       0       0       0  


In [132]:
print(v_result.dtypes)

stimulus    object
diff_r       int64
diff_g       int64
diff_b       int64
dtype: object
