In [1]:
## Need to add collapse across wins/loss for partner to the heatmap
## Use absolute value for that ^^
## make sure to demean all the current output
## add dummy code for study ID to capture third flip angle

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os

In [None]:
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)

In [None]:
script_dir = os.getcwd()

In [None]:
bids_dir = os.path.join(os.getcwd(), '..', 'ds003745')
bids_dir = os.path.abspath(bids_dir)

In [None]:
age_srndna_path = os.path.join(bids_dir, 'participants.tsv')
age_rf1_path = os.path.join(bids_dir, 'participants-rf1.tsv')

In [None]:
age_srndna = pd.read_table(age_srndna_path)
age_rf1 = pd.read_table(age_rf1_path)
age_df = pd.concat([age_srndna, age_rf1])
age_df = age_df.rename(columns={"participant_id": "sub"})
age_df['sub'] = age_df['sub'].str.replace('^sub-', '', regex=True)
age_df['sub'] = age_df['sub'].astype(int)
display(age_df)

In [None]:
## Load in the combined logs file and remove erroneous ratings files for sub 104 and sub 10369

logs = os.path.join(script_dir, 'combined_logs_with_age.csv')
logs_df = pd.read_csv(logs)
logs_df = logs_df.drop([6, 7, 8, 9, 10, 11, 276, 277, 278, 279, 280, 281])

# Apply absolute value to the "response" column
logs_df['response'] = abs(logs_df['response'])

# Display the modified DataFrame
display(logs_df)

In [None]:
fmri_metrics = os.path.join(script_dir, 'exclusions_tsnr-fdmean.csv') # This df stores tsnr and fdmean 
metrics_df = pd.read_csv(fmri_metrics)
display(metrics_df)

In [None]:
fsl_inputs = os.path.join(script_dir, 'L3inputs.csv') # This df stores the relevant FSL inputs
fsl_df = pd.read_csv(fsl_inputs)
print(fsl_df)

In [None]:
flip_angle =  os.path.join(script_dir, 'flip.csv') # This df stores the flip angle designation to be used for dummy variable coding
flip_df = pd.read_csv(flip_angle)
display(flip_df)

In [None]:
# This cell is used to clean the logs files so that each sub only has one row
# Need to add code to drop index column

# Pivot the dataframe to reshape it so that each subject has a single row
pivot_df = logs_df.pivot_table(index='sub', columns=['partner', 'trait'], values='response')

# Flatten the MultiIndex columns
pivot_df.columns = [f'partner_{col[0]}_trait_{col[1]}' for col in pivot_df.columns]

# Merge the age information back
pivot_df = pivot_df.merge(logs_df[['sub', 'age']].drop_duplicates(), on='sub', how='left')

# Reset index to make 'sub' a column again
pivot_df.reset_index(inplace=True)
pivot_df = pivot_df.drop(['index'], axis = 1)


# Print the shape of the reformatted dataframe
#print("Shape of the reformatted dataframe:", pivot_df.shape)
display(pivot_df.head())

In [None]:
# This cell is responsbile for adding the correct L3 input to the subject row
fsl_df['Subject'] = fsl_df['path'].str.extract(r'sub-(\d+)').astype(int)
fsl_df['Run'] = fsl_df['path'].str.extract(r'run-(\d+)').fillna('3').astype(int)
display(fsl_df)

In [None]:
# Now that we have our list of inputs, we can pull out the proper tsnr and fdmean for each subject
# Metric values should be derived from the same L1 run as the L3 input, if L2, use the average of both runs
# We are also going to do the math in this cell for averaging the variables

# Pivot the DataFrame to reshape it
parsed_metrics_df = metrics_df.pivot(index='Subject', columns='Run')

# Flatten the MultiIndex columns
parsed_metrics_df.columns = [f'{var}_{run}' for var, run in parsed_metrics_df.columns]

# Create the averages and the columns for them
parsed_metrics_df['avg_tsnr'] = parsed_metrics_df[['vsmean_stan_1', 'vsmean_stan_2']].mean(axis=1)
parsed_metrics_df['avg_fdmean'] = parsed_metrics_df[['fd_mean_1', 'fd_mean_2']].mean(axis=1)

# Print the reshaped DataFrame
display(parsed_metrics_df)

In [None]:
# Merge the metrics df with the L3inputs (fsl) df and drop unecessary columns
metric_run_merge_df = pd.merge(fsl_df, parsed_metrics_df, on='Subject')
metric_run_merge_df = pd.merge(fsl_df, parsed_metrics_df, on='Subject')


# Create the new columns based on 'Run'
metric_run_merge_df['tsnr'] = metric_run_merge_df.apply(
    lambda row: row['vsmean_stan_1'] if row['Run'] == 1 else (row['vsmean_stan_2'] if row['Run'] == 2 else row['avg_tsnr']),
    axis=1
)
metric_run_merge_df['fdmean'] = metric_run_merge_df.apply(
    lambda row: row['fd_mean_1'] if row['Run'] == 1 else (row['fd_mean_2'] if row['Run'] == 2 else row['avg_fdmean']),
    axis=1
)


# Add dummy coding for study ID (SRNDNA=0 and RF1=1)
metric_run_merge_df['study'] = metric_run_merge_df['Subject'].apply(lambda x: 0 if len(str(x)) == 3 else 1)

# Add columns of ones
metric_run_merge_df['ones'] = metric_run_merge_df['Subject'].apply(lambda x: '1')


# Rename Subject column to sub for reusability
metric_run_merge_df = metric_run_merge_df.rename(columns={"Subject": "sub"})
metric_run_merge_df = pd.merge(flip_df, metric_run_merge_df, on='sub')
metric_run_merge_df = pd.merge(age_df, metric_run_merge_df, on='sub')
metric_run_merge_df = metric_run_merge_df[['sub', 'age', 'tsnr', 'fdmean', 'flip', 'study']]


# Print the new DataFrame
print(metric_run_merge_df)

In [None]:
import pandas as pd

# Define columns to demean, excluding 'sub', 'flip', and 'study'
columns_to_demean = metric_run_merge_df.columns.difference(['sub', 'flip', 'study'])

# Demean the selected columns
demeaned_df = metric_run_merge_df[columns_to_demean].apply(lambda x: x - x.mean(), axis=0)

# Combine the demeaned columns with the original 'B' column
result_df = pd.concat([demeaned_df, metric_run_merge_df[['sub', 'flip', 'study']]], axis=1)

# Add 'ones' column as '1' based on 'sub'
result_df['ones'] = result_df['sub'].apply(lambda x: '1')

# Reorder columns with 'sub' and 'ones' at the beginning
columns = result_df.columns.tolist()
columns.insert(0, columns.pop(columns.index('sub')))
columns.insert(1, columns.pop(columns.index('ones')))
result_df = result_df[columns]

# Display the result
display(result_df)

# Save to CSV
result_df.to_csv('result_df.csv', index=False)


In [None]:
## Now it will make the most sense to merge the inputs to the covariates dataframe for quality assurance and easier copy pasting

# Re-extract the sub variable from the L3inputs
fsl_df['sub'] = fsl_df['path'].str.extract(r'sub-(\d+)').astype(int)
# fsl_df = fsl_df.drop(['Subject', 'Run'], axis = 1)
columns = fsl_df.columns.tolist()
columns.insert(0, columns.pop(columns.index('sub')))
fsl_df = fsl_df[columns]

# Merge result
final_df = pd.merge(result_df, fsl_df, on='sub')
final_df = final_df.drop(['Subject', 'Run'], axis = 1)
display(final_df)
final_df.to_csv('final_df.csv', index=False)


In [None]:
# This cell pulls ratings and creates the difference columns. 
# FYI: Partner vals: 3=Friend, 2=Stranger, 1=Computer
# FYI: Trait vals: 0=How did it feel to win, 1=How did it feel to lose

# Load the reformatted CSV file
data = pivot_df

# List of partner combinations
partner_pairs = [(3, 2), (3, 1)]

# Create a new DataFrame to store the differences
diff_df = pd.DataFrame()

# Iterate over each trait
for trait in [0, 1]:
    for (p1, p2) in partner_pairs:
        # Generate the column names
        col1 = f'partner_{p1}_trait_{trait}'
        col2 = f'partner_{p2}_trait_{trait}'
        
        # Calculate the difference
        diff_col_name = f'diff_partner_{p1}_{p2}_trait_{trait}'
        diff_df[diff_col_name] = data[col1] - data[col2]

# Merge the differences with the original data
result_df = pd.concat([data, diff_df], axis=1)

# Calculate (F_win - F_loss) - (S_win - S_loss)
result_df['Fwin_minus_Floss_minus_Swin_minus_Sloss'] = (result_df['partner_3_trait_0'] - result_df['partner_3_trait_1']) - (result_df['partner_2_trait_0'] - result_df['partner_2_trait_1'])

# Calculate (F_win - F_loss) - (C_win - C_loss)
result_df['Fwin_minus_Floss_minus_Cwin_minus_Closs'] = (result_df['partner_3_trait_0'] - result_df['partner_3_trait_1']) - (result_df['partner_1_trait_0'] - result_df['partner_1_trait_1'])

# Calculate (F_win + S_win + C_win) - (F_loss + S_loss + C_loss)
result_df['FSC_win_minus_FSC_loss'] = (result_df['partner_3_trait_0'] + result_df['partner_2_trait_0'] + result_df['partner_1_trait_0']) - (result_df['partner_3_trait_1'] + result_df['partner_2_trait_1'] + result_df['partner_1_trait_1'])

# Merge the almost complete data with the metric_run_merge dataframe
# At this point we should have all of the necessary covariates in one dataframe
full_covs_df = pd.merge(result_df, metric_run_merge_df, on='sub')

# Rename all of the condition columns for clarity
full_covs_df = full_covs_df.rename(columns={
    "Subject": "sub",
    "age_y" : "age",
    "partner_1_trait_0": "C_win",
    "partner_1_trait_1": "C_loss",
    "partner_2_trait_0": "S_win",
    "partner_2_trait_1": "S_loss",
    "partner_3_trait_0": "F_win",
    "partner_3_trait_1": "F_loss",
    "diff_partner_3_2_trait_0": "FminS_win",
    "diff_partner_3_1_trait_0": "FminC_win",
    "diff_partner_3_2_trait_1": "FminS_loss",
    "diff_partner_3_1_trait_1": "FminC_loss"
})

# Print the first few rows of the resulting DataFrame
result_df.to_csv('result_df.csv', index=False)
print(full_covs_df)


In [None]:
# Almost finished here, need to create two tables
# First table will have the sub in it for when we go to make sure our inputs are in the proper order for L3
# Second table will be used for copy and pasting into the fsl gui
# Can also create custom tables for various models as seen in cells above (
# This cell will be me for running the correlation matrix

table_sub = full_covs_df[['age','sub',
                      'tsnr', 'fdmean',
                      'C_win','C_loss',
                      'S_win','S_loss',
                      'F_win','F_loss',
                      'FminS_win','FminS_loss',
                      'FminC_win', 'FminC_loss', 
                      'Fwin_minus_Floss_minus_Swin_minus_Sloss',
                      'Fwin_minus_Floss_minus_Cwin_minus_Closs',
                      'FSC_win_minus_FSC_loss']]

table = full_covs_df[['age',
                      'tsnr', 'fdmean',
                      'C_win','C_loss',
                      'S_win','S_loss',
                      'F_win','F_loss',
                      'FminS_win','FminS_loss',
                      'FminC_win', 'FminC_loss']]

display(table_sub)

In [None]:
# Set the figure size
plt.figure(figsize=(12, 8))

# Generate the heatmap
heatmap = sns.heatmap(table_sub.corr(), cmap='vlag', annot=True, fmt=".2f", linewidths=.5)

# Display the heatmap
plt.show()