In [6]:
import numpy as np
import pandas as pd
import pathlib
import os
import random
import time
from tqdm import *

In [1]:
from ipynb.fs.full.project_helper_functions_classes import *
from ipynb.fs.full.Feature_extraction_classes_functions import *


In [2]:
class MRC_Analysis():
    
    def __init__(self, stripped_data_folder, original_data_folder, output_root_folder, MRC_output_folder,
                 MRC_analysis_features = {'min_step' : 1, 'max_step' : 0,
                                          'min_power' :1, 'max_power' : 5,
                                         'ROI_shapes': ['circle', 'square'], 
                                         'primary_rate' : 1, 'secondary_rate' : 1,
                                         'calculate_all_powers' : False, 'calculate_all_steps' : False} ):
    
        self.MRC_analysis_features = MRC_analysis_features
        self.stripped_data_folder_path =  os.path.abspath(stripped_data_folder)
        self.original_data_folder_path =  os.path.abspath(original_data_folder)
        self.mainfolders = take_folder_list(self.stripped_data_folder_path)

        ## specify file list for proccess and folder list for record
        self.file_names_list, folder_path_list = take_special_file_list(self.mainfolders, 
                                                                        self.stripped_data_folder_path, self.original_data_folder_path)

        self.output_path = os.path.join(os.path.abspath(output_root_folder), MRC_output_folder)
        create_folder(self.output_path)
        create_mirror_subfolders(folder_paths =folder_path_list, 
                                 old_root_path = self.stripped_data_folder_path, 
                                 new_root_path = self.output_path)
        ## update files list
        min_step , max_step = MRC_analysis_features['min_step'],MRC_analysis_features['max_step']
        self.file_names_list = [fn for fn in self.file_names_list if not check_if_MRC_completed(self.output_path, fn[0], self.stripped_data_folder_path, min_step , max_step)]

     
    
    def make_MRC_analysis(self, processor = 2):
    
        self.run_apply_async_multiprocessing(self.MRC_analysis, self.file_names_list, processor)

            
    def MRC_analysis(self, file_names):
        
        min_step = self.MRC_analysis_features['min_step']
        max_step = self.MRC_analysis_features['max_step']
        min_power = self.MRC_analysis_features['min_power']
        max_power = self.MRC_analysis_features['max_power']
        ROI_shapes = self.MRC_analysis_features['ROI_shapes']
        primary_rate = self.MRC_analysis_features['primary_rate']
        secondary_rate = self.MRC_analysis_features['secondary_rate']
        calculate_all_powers = self.MRC_analysis_features['calculate_all_powers']
        calculate_all_steps = self.MRC_analysis_features['calculate_all_steps']

        ## check if MRC files created before as in desired limits     
        if check_if_MRC_completed(self.output_path, file_names[0], self.stripped_data_folder_path, min_step, max_step) :        
            return(0)


        # define inner function to making MRC analysis for different ROI shapes
        def MRC_calculation_(roi_shape):
            fe = Feature_Extraction(stripped = file_names[0], original = file_names[1], 
                                    min_power = min_power, max_power = max_power,
                                    min_step = min_step, max_step = max_step,
                                    ROI_shape = roi_shape, 
                                    primary_rate = primary_rate, secondary_rate = secondary_rate,
                                    all_powers = calculate_all_powers, all_steps = calculate_all_steps)
            fe.calculate_features(calculate_MRC = True)
            return(fe.MRC_results)

        results = list(map(MRC_calculation_, ROI_shapes))

         # unite different ROI shape results if 2 shape are givev
        if len(results) == 2:
            MRC_results = pd.concat([results[0],  results[1]], ignore_index=True) # unite different ROI results

        if len(results) == 1:
            MRC_results = results[0]

        save_results_to_csv(MRC_results, self.output_path, file_names[0], self.stripped_data_folder_path)


    def run_apply_async_multiprocessing(self, func, argument_list, num_processes):

        pool = Pool(processes=num_processes)

        jobs = [pool.apply_async(func=func, args=(*argument,)) if isinstance(argument, tuple) else pool.apply_async(func=func, args=(argument,)) for argument in argument_list]
        pool.close()

        result_list_tqdm = []
        for job in tqdm(jobs):
            result_list_tqdm.append(job.get())    
        return result_list_tqdm
    


In [16]:
class MRC_statistical_analysis():

    def __init__(self, stripped_data_folder = 'data/stripped_data', 
                 output_root_folder = 'output', MRC_output_folder = 'output/MRC_results',
                 stat_results_folder = 'MRC_stat_results', 
              stat_limits = {'auc_limit' : 0.7, 'p_limit' : 0.01}):

        ## specifying paths
        self.stripped_path =  os.path.abspath(stripped_data_folder)
        self.MRC_path = os.path.abspath(MRC_output_folder)

        ## create folder for saving statistic results
        self.stat_result_path = create_folder(os.path.join(output_root_folder,stat_results_folder))

        ## specify main folders lists
        self.situations = take_folder_list(self.stripped_path)
        
        self.lim_auc, self.lim_p = stat_limits['auc_limit'], stat_limits['p_limit']
        
        
    def make_analysis(self, selected_features_path = 'selected_features.csv',
                     take_p_csv = True, take_auc_csv = True, take_meaningful_csv = True):
                    

        control_id = [situation for situation in self.situations if 'c' == situation[0]][0]
        patient_id = [situation for situation in self.situations if 'p' == situation[0]][0]
        
        self.take_p_csv, self.take_auc_csv, self.take_meaningful_csv = take_p_csv, take_auc_csv, take_meaningful_csv
       
        
        ## specify subfolders lists for each situation (control and patients)
        subfolders = {}
        for situation in self.situations:
            subfolders[situation] = take_folder_list(os.path.join(self.stripped_path, situation))
        ## specify mutual subfolders for situations
        mutual_subfolders = take_mutual_members(subfolders)
        
        self.column = ['MRI_type', 'shape', 'primary_rate', 'secondary_rate', 'step', 'power']

        self.all_p_results, self.all_AUC_results, self.meaningful_results = [], [], []
        self.all_selected_features = []

        for subfolder in tqdm(mutual_subfolders) :

            controls_csv_files_paths = take_spesific_files_paths(os.path.join(self.MRC_path, control_id, subfolder), '.csv')    
            patients_csv_files_paths = take_spesific_files_paths(os.path.join(self.MRC_path, patient_id, subfolder), '.csv')

            ## calculation of p value (Mann-Whitney U) and AUC (ROC Curve analysis)    
            cs = analyse_MRC_results(controls_csv_files_paths, patients_csv_files_paths)   
            p_results, AUC_results, meaningful_features = cs.take_stats(folder_type = subfolder, 
                                                                        auc_limit = self.lim_auc, p_limit = self.lim_p)

            selected_features = cs.power_analysis()
            self.all_selected_features.extend(selected_features)

            ## store results 
            if self.take_p_csv:
                self.all_p_results.extend(p_results)
            if self.take_auc_csv :
                self.all_AUC_results.extend(AUC_results)
            if self.take_meaningful_csv :
                self.meaningful_results.extend(meaningful_features)
                
                
    def make_collective_analysis(self, selected_features_path = 'selected_features_coll.csv',
                     take_p_csv = True, take_auc_csv = True, take_meaningful_csv = True,
                     sequences = {'t1': ['t1_tra', 't1_sag'], 't2' : ['t2_cor', 't2_tra']},
                    case_info_excel_file = 'output/MRI_informations.xlsx'):
        
        self.take_p_csv, self.take_auc_csv, self.take_meaningful_csv = take_p_csv, take_auc_csv, take_meaningful_csv
        ## specify column names for identify values in lists
        self.column = ['sequence', 'shape', 'primary_rate', 'secondary_rate', 'step', 'power']
        
        control_id = [situation for situation in self.situations if 'c' == situation[0]][0]
        patient_id = [situation for situation in self.situations if 'p' == situation[0]][0]
                
        self.all_p_results, self.all_AUC_results, self.meaningful_results = [], [], []
        self.all_selected_features = []

        for key in tqdm(sequences.keys()):

            controls_csv_files_paths, patients_csv_files_paths = {}, {}

            for subfolder in sequences[key] : 

                controls_csv_files_paths[subfolder] = take_spesific_files_paths(os.path.join(self.MRC_path, control_id, subfolder),'.csv')        
                patients_csv_files_paths[subfolder] = take_spesific_files_paths(os.path.join(self.MRC_path, patient_id, subfolder), '.csv')

            ## calculation of p value (Mann-Whitney U) and AUC (ROC Curve analysis) 
            cs = analyse_MRC_results(collective_evaluation=True)      
            cs.activate_collective_evaluation(controls_csv_files_paths, patients_csv_files_paths, 
                                    sequence_folders = sequences[key], caseid_info_file=case_info_excel_file, 
                                              control_id = control_id, patient_id = patient_id)    
            p_results, AUC_results, meaningful_features = cs.take_stats(folder_type = key, 
                                                                        auc_limit = self.lim_auc, p_limit = self.lim_p)        
            selected_features = cs.power_analysis()
            self.all_selected_features.extend(selected_features)    
            ## store results
            if self.take_p_csv:
                self.all_p_results.extend(p_results)
            if self.take_auc_csv:
                self.all_AUC_results.extend(AUC_results)
            if self.take_meaningful_csv:    
                self.meaningful_results.extend(meaningful_features)
                
                
        
    def save_results(self, p_csv_path = 'p_values.csv', auc_csv_path = 'auc_values.csv', 
                      meaningful_stat_csv_path = 'meaningful_stats.csv', 
                     selected_features_path = 'selected_features.csv',
                     sort_columns = True):
    
        ## save results to csv files
        save_stat_to_csv(self.all_selected_features, labels = self.column  + ['AUC_value', 'p_value'], 
                             path = os.path.join(self.stat_result_path, selected_features_path), 
                             sort_columns = sort_columns,
                             ascending_order = False, sorting_columns = ['AUC_value', 'p_value'])       
        if self.take_p_csv: 
            save_stat_to_csv(self.all_p_results, labels = self.column + ['p_value'], 
                             path = os.path.join(self.stat_result_path, p_csv_path), sort_columns = sort_columns,
                             ascending_order = True, sorting_columns = ['p_value'])
        if self.take_auc_csv :
            save_stat_to_csv(self.all_AUC_results, labels = self.column + ['AUC_value'], 
                             path = os.path.join(self.stat_result_path, auc_csv_path), sort_columns = sort_columns,
                             ascending_order = False, sorting_columns = ['AUC_value'])
        if self.take_meaningful_csv :
            save_stat_to_csv(self.meaningful_results, labels = self.column + ['AUC_value', 'p_value'], 
                         path = os.path.join(self.stat_result_path, meaningful_stat_csv_path), sort_columns = sort_columns,
                         ascending_order = False, sorting_columns = ['AUC_value', 'p_value'])

     
        
                

In [14]:
class EPE_Analysis__():

    def __init__(self, stripped_data_folder = 'data/stripped_data',
            original_data_folder = 'data/ordered_data',
            output_root_folder = 'output' ,
            stat_results = 'EPE_stat_results',
            feature_out = 'features'):

        ## specifying paths
        self.stripped_path =  os.path.abspath(stripped_data_folder)
        self.original_path =  os.path.abspath(original_data_folder)
        self.output_path = os.path.abspath(output_root_folder)
        self.EPE_stat_results_folder_path = create_folder(os.path.join(self.output_path, stat_results))
        self.feature_output_folder_path = create_folder(os.path.join(self.output_path, feature_out))
        ## specify main folders lists
        self.situations = take_folder_list(self.stripped_path)        
        ## specify controls and patients folders' names    
        self.control_id = [situation for situation in self.situations if 'c' == situation[0]][0]
        self.patient_id = [situation for situation in self.situations if 'p' == situation[0]][0]
        ## specify all file names lists
        self.file_list, folder_path_list = take_special_file_list(self.situations, 
                                                             self.stripped_path, self.original_path)

                
    def make_analysis(self, features_csv_path = 'MRC_stat_results/selected_features.csv', 
                      EPE_stat_file_name = 'results.csv',
                      specificity_min = 0.9, sensitivity_min = 0.6,
                      per_case_value_limit = 1000,
                      record_features = True,
                      follow_proccess = 'only_stats',
                     processor_number = 4):
        
        ## import features' informations
        features_path = os.path.join(self.output_path, features_csv_path)        
        features_infos = pd.read_csv(features_path)
        ## recording file paths based on single folders clinical conditions(situation):
        file_paths = {}
        for folder in features_infos['MRI_type'].unique():
            file_paths[folder] = special_filter_path(self.file_list, self.situations, [folder])
        ##start dataframe for store results    
        df_stat = pd.DataFrame()                            

        for key in file_paths.keys():
            ## specify features belong to specific folder (key)
            features = features_infos[features_infos['MRI_type'] == key]
            EPE = EPE_analysis(self.control_id, self.patient_id, PCVL  = per_case_value_limit, 
                               feature_output_folder = self.feature_output_folder_path,
                               follow_proccess = follow_proccess)
            EPE_df = EPE.activate_EPE_analysis(features, 
                                               file_paths[key][self.control_id],  
                                               file_paths[key][self.patient_id],
                                               specificity_min = specificity_min, 
                                               sensitivity_min = sensitivity_min)    
            df_stat = pd.concat([EPE_df, df_stat], axis = 0, ignore_index=True)
        
            ## save results        
            result_output_path =  os.path.join(self.EPE_stat_results_folder_path, key + '-' + EPE_stat_file_name)
            df_stat.sort_values(by = 'specificity(EPE)', ascending= False, inplace=True)
            df_stat.to_csv(result_output_path, index =False) 

       
        
    def make_collective_analysis(self, features_collective_csv_path = 'MRC_stat_results/selected_features_coll.csv', 
                      coll_EPE_stat_file_name = 'coll_results.csv',
                      sequences = {'t1': ['t1_tra', 't1_sag'], 't2' : ['t2_cor', 't2_tra']},
                      case_info_excel_file = 'output/MRI_informations.xlsx',
                      specificity_min = 0.9, sensitivity_min = 0.6,
                      per_case_value_limit = 1000,
                      record_features = True,
                      follow_proccess = 'only_stats',
                     processor_number = 2):
        
        ## import features' informations
        collective_features_path = os.path.join(self.output_path, features_collective_csv_path)     
        features_coll_infos = pd.read_csv(collective_features_path)
        ## recording file paths based on single folders clinical conditions(situation):
        file_paths_coll = {}
        for sq in features_coll_infos['sequence'].unique():
            file_paths_coll[sq] = special_filter_path(self.file_list, self.situations, sequences[sq])
        ##start dataframe for store results
        df_stat = pd.DataFrame()         
 
        for key in file_paths_coll.keys():
            ## specify features belong to specific folder (key)
            features = features_coll_infos[features_coll_infos['sequence'] == key]
            EPE = EPE_analysis(self.control_id, self.patient_id, PCVL  = per_case_value_limit, 
                               feature_output_folder = self.feature_output_folder_path,
                               follow_proccess = follow_proccess)
            EPE_df = EPE.activate_EPE_analysis(features, 
                                               file_paths_coll[key][self.control_id],  
                                               file_paths_coll[key][self.patient_id],
                                               specificity_min = specificity_min, 
                                               sensitivity_min = sensitivity_min,
                                              activate_collective_analysis = True, 
                                              info_path = case_info_excel_file, 
                                               sequence_folders = sequences[key])   
            df_stat = pd.concat([EPE_df, df_stat], axis = 0, ignore_index=True)
        
            ## save results        
            result_output_path =  os.path.join(self.EPE_stat_results_folder_path, key + '-' + coll_EPE_stat_file_name)
            df_stat.sort_values(by = 'specificity(EPE)', ascending= False, inplace=True)
            df_stat.to_csv(result_output_path, index =False)
            

        
                
            



In [4]:
def create_test_data(root_folder_path, folder_name,  folder_list):
    
    """
    
    """
    situations = ['controls', 'patients']
    # create folders
    create_new_folder(os.path.join(root_folder_path, folder_name)) 
    for situation in situations:        
        create_new_folder(os.path.join(root_folder_path, folder_name, situation))
        for folder in folder_list:
            create_new_folder(os.path.join(root_folder_path, folder_name, situation, folder))
 
    def arr_proccess(arr):
        sub_arr = np.array([1,2,3,2,4])
        sub_arr = np.tile(sub_arr, (5,1))
        for i in range(5) : 
            for j in  np.arange(0, 100, 5):
                for k in  np.arange(0, 100, 5):
                    arr[i, j:j+5, k:k+5] = sub_arr
                    
        arr[2, 48:53, 48:53] = np.ones((5,5)) + 1

        return(arr)
    
    coords = [2, 50, 50]
    
     
    for folder in folder_list:
        
        control_path = os.path.join(root_folder_path, folder_name, 'controls', folder)
        patient_path = os.path.join(root_folder_path, folder_name, 'patients', folder)
    
        for i in range (5):        
            arr = np.zeros((5,100,100))   
            arr = arr_proccess(arr)
            ##for controls
            controls_image = nib.Nifti1Image(arr, affine=np.eye(4))
            file_name = 'case' + str(i+1) + '.nii.gz'
            file_path =  os.path.join(control_path, file_name)
            if not os.path.isfile(file_path):
                nib.save(controls_image, file_path)


            ## for patients
            arr[coords[0]][coords[1]][coords[2]] = 6
            patient_image = nib.Nifti1Image(arr, affine=np.eye(4))
            file_name = 'case' + str(i+1) + '.nii.gz'
            file_path = os.path.join(patient_path, file_name)
            if not os.path.isfile(file_path):
                nib.save(patient_image,  file_path)
                
    return(coords)

        

In [7]:
def test_EF_Coordinates(estimated_coordinates, EF_coordinates):
    for a,b,c in estimated_coordinates:
        if [a,b,c] != EF_coordinates:
            print("The test is failed. Coordinates are not matching !")
            return(0)
    print(" The test is passed. Algorithm is working properly! ")
        
    

In [6]:
def create_excel_file(excel_path, situations, subfolder_list):
    """
    
    """
    if not os.path.isfile(excel_path):        
        with pd.ExcelWriter(excel_path) as writer:  
            for situation in situations:
                for subfolder in subfolder_list:
                    sheetname = situation + '_' + subfolder 
                    df = pd.DataFrame()
                    df['input_name'] = ['a', 'b', 'c', 'd', 'e']
                    df['case_name']  = ['case1', 'case2', 'case3', 'case4', 'case5']
                    df.to_excel(writer, sheet_name = sheetname)

In [3]:
def create_new_folder(folder_path):
    """
    
    """
    
    if not os.path.exists(folder_path):
        os.mkdir(folder_path)