In [3]:
import os
import numpy as np
import nibabel as nib
import pandas as pd
import matplotlib as mpl
import seaborn as sns

In [4]:
from ipynb.fs.full.visualization_helper_functions import *
from ipynb.fs.full.project_helper_functions_classes import create_folder

In [5]:
stripped_data_folder = 'data/stripped_data'
original_data_folder = 'data/ordered_data'
feature_output_folder = 'output/features'
additional_features_folder = 'output/features_additional'
features_csv_path = 'output/EPE_stat_results/EPE_selected_features.csv'
sequences_dict = {'t1': ['t1_tra', 't1_sag'], 't2' : ['t2_cor', 't2_tra']}

In [6]:
EF_coordinate_output_path = create_folder('output/EF_coordinates')

In [7]:
EF_coordinate_output_path

'output/EF_coordinates'

In [8]:
case_info_excel_path = 'output/MRI_informations.xlsx'
control_id = 'controls'
patient_id = 'patients'

case_matching_df = take_case_matching_list(case_info_excel_path, control_id, patient_id)

In [9]:
## import info

features_df = pd.read_csv(features_csv_path)
columns = ['situation', 'coordinates', 'case', 'sequence']
positive_case_df = pd.DataFrame(columns = columns)

for feature_ind in range(0, len(features_df)):
    feature_vals = features_df.iloc[feature_ind, :6].values
    feature_pkl_path = take_pkl_file_path(feature_vals, feature_output_folder)
    feature_df = pd.read_pickle(feature_pkl_path)

    ### update case names
    MR_type = feature_vals[0]
    case_list = list(feature_df['case'].values)
    seq_list = [val.split('-')[0] for val in case_list]  ## seq list for seq+case

    if MR_type not in case_list[0]:        
        case_list = [MR_type + '-' + case for case in case_list]
        seq_list = [MR_type] * len(case_list) ## seq list for case
        
    feature_df['case'] = case_list
    feature_df['sequence'] = seq_list
    
    threshold = features_df['threshold'].iloc[feature_ind]
    step = features_df['step'].iloc[feature_ind]
    ROI_shape = features_df['shape'].iloc[feature_ind]
    foci_df = feature_df[columns][feature_df['value'] > threshold]
    foci_df['parameter'] = [feature_ind + 1] * len(foci_df)
    if len(positive_case_df) == 0:
        positive_case_df = foci_df.copy()
    else:
        positive_case_df = pd.concat([foci_df, positive_case_df], axis = 0, join = 'outer', ignore_index=True)




In [10]:
## drop duplicates after additions
positive_case_df = positive_case_df.drop_duplicates().reset_index(drop = True)

In [11]:
### assigning case ids 

positive_case_df['case_id']  = ['case'] * len(positive_case_df)
for i in range(0, len(positive_case_df)):
    row = positive_case_df.iloc[i]
    sit = row['situation']
    mr_type, case_num = row['case'].split('-')
    ind = case_matching_df[(case_matching_df['situation'] == sit) & (case_matching_df[mr_type] == case_num)].index[0]
    positive_case_df.loc[i, 'case_id'] = case_matching_df.iloc[ind]['case_id']


    positive_case_df['case_seq'] = positive_case_df['case_id'] + ['('] * len(positive_case_df) + positive_case_df['sequence'] +[')'] * len(positive_case_df)
    
    ## change parameter column name
    
    positive_case_df = positive_case_df.rename(columns={"parameter": "parameter_set"})
    parameters_id = "parameter_set"


In [12]:
columns = ['situation', 'coordinates', 'case_id']
coordinates_df = pd.DataFrame(columns = columns)
collected_df = pd.DataFrame(columns = columns + ['case', 'parameter_set', 'case_seq'])

for feature_ind in range(0, len(features_df)):

    temp_df = positive_case_df[columns][positive_case_df[parameters_id] == feature_ind + 1 ]
    temp_2 = positive_case_df[columns + ['case', 'parameter_set', 'case_seq']][positive_case_df[parameters_id] == feature_ind + 1 ]
    if len(coordinates_df) == 0:
        coordinates_df = temp_df.copy()
        collected_df = temp_2.copy()
    coordinates_df = pd.merge(temp_df, coordinates_df, on = columns, how = 'inner')
    collected_df = pd.concat([temp_2, collected_df], axis = 0, join = 'outer', ignore_index=True)
    

In [13]:
## create file with common ids

df = collected_df.copy()

df['EF_id'] = np.zeros((len(df)))
df = df.replace(regex=['patient'],value='PWE')

case_coordinates = {}
for ind in df.index:
    if df.iloc[ind]['EF_id'] == 0 :
        row = df.iloc[ind]
        case_seq, case_id, coordinates = row['case_seq'],  row['case_id'], row['coordinates']

        if case_id  not in case_coordinates.keys():
            case_coordinates[case_id] = 1
        else:
            case_coordinates[case_id] += 1
            
        same_coord_index = df[(df['case_id'] == case_id) & (df['case_seq'] == case_seq) & (df['coordinates'] == coordinates)].index 
        df.loc[same_coord_index,'EF_id'] = case_coordinates[case_id]

id_ef_df = df.copy()


In [14]:
id_ef_df.head()

Unnamed: 0,situation,coordinates,case_id,case,parameter_set,case_seq,EF_id
0,controls,"(8.0, 178.0, 92.0)",control36,t1_tra-case21,9,control36(t1_tra),1.0
1,controls,"(4.0, 118.0, 92.0)",control2,t1_tra-case25,9,control2(t1_tra),1.0
2,controls,"(9.0, 140.0, 205.0)",control17,t1_tra-case10,9,control17(t1_tra),1.0
3,controls,"(9.0, 201.0, 200.0)",control17,t1_tra-case10,9,control17(t1_tra),2.0
4,PWEs,"(19.0, 144.0, 152.0)",PWE27,t1_tra-case17,9,PWE27(t1_tra),1.0


In [15]:
case_sequences = id_ef_df['case'].unique()
situations =  ['controls', 'PWEs']
selected_parameters = id_ef_df['parameter_set'].unique()

In [16]:
selected_parameters

array([9, 8, 7, 6, 5, 4, 3, 2, 1])

In [17]:
sit_folder = ['controls', 'patients']
positive_case_file_path_list = []
reshaped_data_folder = 'output/reshaped_data'

for i, sit_ in enumerate(situations) :
   
    temp_sit = id_ef_df[ (id_ef_df['situation'] == sit_)]

    df_file_folder_sit = create_folder( os.path.join(EF_coordinate_output_path, sit_folder[i]))
    
    for case_seq in case_sequences:

        seq, case = case_seq.split('-')
    
        temp_case = temp_sit[(temp_sit['case'] == case_seq)]

        for param in selected_parameters:

            temp_param = temp_case[temp_case['parameter_set'] == param]
            temp_param = temp_param.drop_duplicates()

            if len(temp_param) > 0 :

                coordinates = temp_param['coordinates'].values

                print(f'coordinates number : {len(coordinates)}, case : {case}, sit : {sit_}')
                

                # Unpack the data using zip
                x_values, y_values, z_values = zip(*coordinates)
                
                # Convert them to lists
                x_values = list(x_values)
                y_values = list(y_values)
                z_values = list(z_values)
                labels = temp_param['EF_id'].values

                

                                # Sample coordinates data
                data = {
                    'x': x_values,
                    'y': y_values,
                    'z': z_values,
                    'label': labels
                }
                
                # Create DataFrame and save to CSV
                df = pd.DataFrame(data)
                df_file_path_case = create_folder( os.path.join(df_file_folder_sit, case))
                df_file_path = df_file_path_case + '/ParameterSet' + str(param) + '.csv' 
                df.to_csv(df_file_path, index=False)

                list_pair = [os.path.join(reshaped_data_folder, sit_folder[i], seq, case + '.nii.gz'), df_file_path_case]
                positive_case_file_path_list.append(list_pair)



coordinates number : 1, case : case21, sit : controls
coordinates number : 1, case : case21, sit : controls
coordinates number : 1, case : case21, sit : controls
coordinates number : 1, case : case25, sit : controls
coordinates number : 1, case : case25, sit : controls
coordinates number : 1, case : case25, sit : controls
coordinates number : 2, case : case10, sit : controls
coordinates number : 2, case : case10, sit : controls
coordinates number : 2, case : case10, sit : controls
coordinates number : 1, case : case17, sit : controls
coordinates number : 1, case : case17, sit : controls
coordinates number : 1, case : case17, sit : controls
coordinates number : 1, case : case17, sit : controls
coordinates number : 1, case : case17, sit : controls
coordinates number : 1, case : case17, sit : controls
coordinates number : 2, case : case7, sit : controls
coordinates number : 2, case : case7, sit : controls
coordinates number : 1, case : case7, sit : controls
coordinates number : 2, case : 

In [54]:
positive_case_file_path_list_df = pd.DataFrame(columns = ['MRI_path', 'coordinate_folder_path'], data = positive_case_file_path_list)

In [55]:
positive_case_file_path_list_df.to_csv('output/positive_case_file_path_list_df.csv')