"""
Contents:

0 - import necessary libraries

1 - general aim functions 

    1.1 - create_folder (function)
         : creates new folders
    1.2 - create_mirror_subfolders
         : create mirror folder     
    1.3 - take_folder_list (function)
         : gives folder lists within spesific folder     
    1.4 - take_path_list
         : gives absolute paths lists  (function)
    1.5 - take_spesific_file_names (function)
         : gives file names list with spesific extension
    1.6 - take_mutual_members (function)
         : gives mutual members of lists within dictionary
    1.7 - take_spesific_files_paths(functions
         : gives a list of file path which are with the defined extension within given folder
    1.8 - special_filter_path
         : seperate and filter given path based on given conditions
    1.9 - extract_case_id
         : extract case id from nifti file path
    
2- function for obtain MRI acquisition informations

    2.1 - take_acquisition_info  (function)
      : this function gives chosen dicom metadata in data frame format

3 -  class convert dicom file to nifti files

    3.1 convert_dcm_folders_to_nifti_folder (class)    
      : converts dicom file sequences to single nifti file (.nii.gz)

4 - spesific functions to MRC analysis

    4.1 - take_special_file_list (function)
         : gives spesific file path and folder path informations for MRC analysis
    4.2 - read_multipl_csv_files (function)
         : gives a list which in consist of MRC results
    4.3 - read_multipl_csv_files_to_dict (function)
         : gives a list consist of dataframes which are including features informations and multipl MRC results

5 - Functions and classes for statistical analysis

    5.1 - analyse_MRC_results (class)
         : makes Mann-Whitney U test and ROC Curve analysis and gives p values and AUC stats
    5.2 - find_p_value (function)
         : gives p value based on Mann-Whitney U test         
    5.3 - find_auc (function)
         : gives AUC based on ROC analysis
    5.4 - save_stat_to_csv(function)
         : save given data into the csv file in a specific format

      
"""

# 0 - import necessary libraries

In [1]:
import pandas as pd
import numpy as np
import os 
import pathlib

import dicom2nifti
import pydicom

from scipy.stats import mannwhitneyu
import sklearn.metrics as metrics


In [1]:
from ipynb.fs.full.Feature_extraction_classes_functions import *

# 1- general aim functions or classes

In [None]:
# 1.1
def create_folder(folder):
    """
    --this function create folder 
    --you could give spesific folder path, or 
    if you give string the function will create a folder within same folder
    
    param  : folder (type : str)
            - path_like string: directly the folder path
            - folder_name : folder will be created in the same diroctory with the code
            
    output :  folder_path (type : str) 
                created folder path
    
    """
    folder_path = folder
    
    if os.path.exists(folder_path):
        return(folder_path)
    
    if type(folder_path) is str :        
        folder_path =  os.path.abspath(folder_path)
        
    os.mkdir(folder_path)
    return(folder_path)
    

In [None]:
# 1.2
def create_mirror_subfolders(folder_paths = [''], 
                         old_root_path = '', new_root_path = ''):
    
    """
    this function creates new folders in the same hierarchy 
    in the given new root same as the given original root
    
    params  : 
    folder_paths (type : list) : existed folder paths for mirroring       
    old_root_path (type : str(path_like)) : rooth path for beginning to the create mirror brunches
    new_root_path (type : str(path_like)) : mirror brunches rooth path

            
    output : -
    
    """
    
    for folder_path in folder_paths:
        child_path = folder_path.split(old_root_path)[-1]
        new_path = os.path.join(new_root_path, child_path[1:])
        
        if not os.path.exists(new_path):
            os.mkdir(new_path)
            
            


In [None]:
# 1.3
def take_folder_list(folder_path):
    
    """
    this function gives a list of folder names within given folderpath
    
    params  : 
    folder_path (type : str(path_like)) : existed folder path  
            
    output: 
    folder_list (type : list) : folder names list
    
    """
    
    if not os.path.exists(folder_path):
        return(print('"take_folder_names_list" function error : \n the given path is not a valid folder path'))
    
    folder_list = os.listdir(folder_path)
    folder_list = [folder for folder in folder_list if  os.path.isdir(os.path.join(folder_path, folder))]
    
    return(folder_list)

In [None]:
# 1.4
def take_path_list(root_folder, folder_list):

    """
    this function gives a list of folders' paths withing given folderpath
    
    params  : 
    root_folder (type : str(path_like))  : existed folder path 
    folder_list (type : list) : folder names' list within root_folder
            
    output: 
    folder_list (type : list) : folder paths' list
    
    """
        
    root_path = os.path.abspath(root_folder)
    if not os.path.exists(root_path):
        return(print('"take_path_list" function error : \n the given root_folder is not a valid folder'))
    if type(folder_list) is not list:
        return(print('"take_path_list" function error :\n the input is not a list'))
    if not folder_list:
        return(print('"take_path_list" function error :\n the input list is empty'))
    
    paths = []
    
    for folder in folder_list:
        paths.append(os.path.join(root_path, folder))
        
    return (paths)

In [None]:
# 1.5

def take_spesific_file_names(path, extension):
    
    """
    this function gives a list of dcm file names withing given folder
    
    params  : 
    path (type : str(path_like))  : folder path which includes files
    extension  (type : str)  : spesific extention of file for listing

    output: 
    dcm_list (type : list) : dicom files' names' list
    
    """
    file_list = os.listdir(path)
    file_list = [file for file in file_list if extension in file]
    return(file_list)

In [None]:
# 1.6

def take_mutual_members(member_dict):
    
    
    """
    this function gives a list of member which are exist in each members of dictionary
    
    params  : 
    member_dict (type : dictionary)  : the dictionary includes lists which are including members(int/float/str)


    output: 
    mutual_members (type : list) : this list includes mutual members of lists of dictionary
    
    """
    
    if type(member_dict) is dict :
        
        # specify dictionary keys
        dict_keys = list(member_dict.keys())
        
        # get all members of first key
        members0 = member_dict[dict_keys[0]]
        
        # compare first key members if they are also member of other keys
        mutual_members = [member for member in members0 for key in dict_keys[1:] if member in member_dict[key]]
        
        return(mutual_members)
    
    else:
        return(print('the input is not a dictionary'))
        

    
    

In [None]:
# 1.7
def take_spesific_files_paths(root_folder = '', extension = '.csv'):
    
    """
    gives a list of file path which are with the defined extension within given folder
    
    params  : 
    root_folder (type : str(path_like))  : indicates folder path 
    extension (type: str)  : point out files type for taking spesific files

    output: 
    file_paths (type : list) : this list includes files paths with the given extension
    
    """
    # take all files
    file_names = os.listdir(root_folder)
    # take files only with the spesific extension
    file_names = [file for file in file_names if extension in file]
    # define files paths
    file_paths = [os.path.join(root_folder, file_name) for file_name in file_names]
    
    return(file_paths)
    
    

In [None]:
# 1.8 
def special_filter_path(path_list, seperate_conditions = [], include_features = []):
    
    """
    
    this function filter gives a specific path list based on given separate conditions and features
    
    parameters:
    
    path_list (type: list) : list which consists of lists and these lists includes file paths
    seperate_conditions (type: list) : list contains conditions for sperating paths 
    include_features (type: list) : includes strings which are expecting to be inside of paths
    
    outputs:
    path_dic (type:dict) : includes selected file paths lists
    
    
    """
    
    path_dic = {}
    
    for condition in seperate_conditions:
        
        path_dic[condition] = [path for path in path_list if any(feat in path[0] for feat in include_features) and (condition in path[0])]
                               
    return(path_dic)
        

In [13]:
# 1.9 
def extract_case_id(file_path, collective_analysis = False):
    
    """
    change
    
    """
    
    file_name = os.path.split(file_path)[1]
    case_id = file_name.split('.nii.gz')[0]
    
    if collective_analysis:
        folder = os.path.dirname(file_path).split('/')[-1]
        case_id = folder + '-' + case_id
    
    return(case_id)

# 2- function for obtain MRI acquisition informations

In [None]:
#3.1
def take_acquisition_info(data_path, case, case_id):
    
    """
    this function gives chosen dicom metadata in data frame format
    
    params  : 
    data_path (type : str(path_like))  : dicom file path for obtain acquisition informations
    case (type : str) : case name inside original folders
    case_id (type : str) : assigned case id which will be used in further process
            
    output: 
    df (type : pandas.DataFrame) : dataframe contains cquisition information type as column name
                                    and value is the corresponding value in metadata
    
    """
    
    dicom_files = take_spesific_file_names(data_path, '.dcm')
    
    if not dicom_files:
        return('folder is not including dicom data')
    
    dicom_path = os.path.join(data_path, dicom_files[0])
    dataset = pydicom.dcmread(dicom_path)

    data_informations = {}

    data_informations['input_name'] = case
    data_informations['case_name'] = case_id
    data_informations['TR'] = dataset[0x0018, 0x0080].value
    data_informations['TE'] = dataset[0x0018, 0x0081].value
    data_informations['FA'] = dataset[0x0018, 0x1314].value
    data_informations['Percent_Phase_FOV'] = dataset[(0x0018, 0x0094)].value
    data_informations['Spacing_Between_Slices'] = dataset[0x0018, 0x0088].value
    data_informations['Slice_Thickness'] = dataset[0x0018, 0x0050].value
    data_informations['Rows'] = dataset[0x0028, 0x0010].value
    data_informations['Columns'] = dataset[0x0028, 0x0011].value
    data_informations['Pixel_Spacing'] = dataset[0x0028, 0x0030].value
    data_informations['Acquisition_Matrix'] = str(dataset[0x0018, 0x1310].value)
    data_informations['MR_Acquisition_Type'] = str(dataset[0x0018, 0x0023].value)
    data_informations['Manufacturer'] = str(dataset[0x0008, 0x0070].value)
    data_informations['Magnetic_Field_Strength'] = str(dataset[0x0018, 0x0087].value)

    df = pd.DataFrame.from_dict(data_informations, orient='columns')
    df = df.drop_duplicates()

    return(df)

In [None]:

### this function will be used in next code for checking input and output files corresponding identity

def check_if_files_corresponding(path1, path2):
    
    """
    this function check the children and first parents of given files for saving stripped data correctly
    params:
    path1 (type : str) : input file path  (will be stripped)
    path2 (type : str) : output file path (stripped file will be saved as this path)
    output : 
    files_corresponding(type: bool) : True: if files are matching correctly
                                      False: if files are not matching    
    """
    
    files_corresponding = True
    
    path1, path2 = pathlib.PurePath(path1), pathlib.PurePath(path2)
    
    if path1.name != path2.name:
        files_corresponding = False
        
    if path1.parent.name != path2.parent.name:
        files_corresponding = False
    
    return(files_corresponding)

# 3 -  class convert dicom file to nifti files

In [None]:
3.1

class convert_dcm_folders_to_nifti_folder():
    
    """
    this class find each dicom folder and transform to nifti files within given folder hierarchy
    

    """
        
    def take_folder_list(self, folder_path):
        
        """
        
        function gives folder names' list within given folder path
        
        params  : 
        folder_path (type : str(path_like))  : folder path

        output: 
        folder_list (type : list) : folder names lists within given folder (folder_path)
        
        """
        
        # creating subfolder list within given folder
        folder_list = os.listdir(folder_path)
        folder_list = [folder for folder in folder_list if  os.path.isdir(os.path.join(folder_path, folder))]
        return(folder_list)
    
    def transform_files(self, origin_folder, target_folder):
        
        """
        
        this function take an original folder and enter inside each subfolder;
        if subfolder consists dicom series then transform to nifti format and save to spesific path 
        which is mirror folder of the transform dicom series
        is fubfolder does not include dicom series, the function continue to enter subfolders
        until reading all subfolders
        
        params  : 
        origin_folder (type : str(path_like))  : folder path which includes Dicom files series
        target_folder (type : str(path_like))  : folder path for saving nifti which transformed from dicom series
        

        output: -
        
        """
        
        folder_paths_list = self.take_folder_list(origin_folder)   
        
        ## take new folder names
        for folder in folder_paths_list:      
            
            ## define data_path
            data_path = os.path.join(origin_folder, folder)
            target_path = os.path.join(target_folder, folder)

            ## check inside of folder if it has still subfolder
            new_folder_list = self.take_folder_list(data_path)
        
            ## if it has subfolder create same subfolder inside nifti brunch
            if new_folder_list:
                
                if not os.path.exists(target_path):
                    create_folder(target_path)
                    
                self.transform_files(data_path, target_path)

            ## if there is no subfolder anymore, convert data inside of folder to the nifti and save
            else:
                create_folder(target_path)
                
                dicom2nifti.convert_directory(data_path, target_path)
                

    
   

# 4 - spesific functions to MRC analysis

In [None]:
# 4.1
def take_special_file_list(mainfolders, root_path, original_root_path):
    

    """
    this function gives spesific file path and folder path informations for MRC analysis
    
    params  : 
    mainfolders (type : list)  : list which is including main folder e.g. : ['patients', 'controls']
    root_path (type : str(path_like)) : stripped folders' root path 
    original_root_path (type : str(path_like)) :  original nifti files (not stripped) root folder paths 
            
    output: 
    file_names_list (type : list) : list members include : [stripped nifti files path, original nifti file path]
    folder_path_list (type : list): includes subfolder paths for saving MRC results
    
    """
    
    file_names_list = []
    folder_path_list = []
    
    for mainfolder in mainfolders:
        
        ## record main folder path
        folder_path_list.append(os.path.join(root_path, mainfolder))

        ## create subfolder list
        subfolders = take_folder_list(os.path.join(root_path, mainfolder))

        for subfolder in subfolders:

            ## specify subfolder path
            subfolder_path = os.path.join(root_path, mainfolder, subfolder)
            
            ## record subfolder path
            folder_path_list.append(subfolder_path)

            ## taking case names lists
            case_list = os.listdir(os.path.join(root_path, mainfolder, subfolder))
            case_list = [case  for case in case_list if '.nii.gz' in case]
            
            for case in case_list:
                ## saving stripped and original file paths
                file_names_list.append([os.path.join(subfolder_path, case),
                                        os.path.join(original_root_path, mainfolder, subfolder, case)])
                
                
    return (file_names_list, folder_path_list)

In [None]:
# 4.2
def read_multipl_csv_files(file_path_list):
    
    """
    this function gives a list contains given csv files data
    
    parameter:
    
    file_path_list(type: list) : contains csv file paths
    
    output:
    
    csv_results(type: list) : include data results of csv files
    
    """
    csv_results = []
    for csv_file in file_path_list:
        data = pd.read_csv(csv_file)
        csv_results.append(data)
    return(csv_results)
        
    

In [None]:
# 4.3
def read_multipl_csv_files_to_dict(path_dict, cases_info):
    
    """
    this function gives a list consist of dataframes which are including 
        features informations and MRC results from different subfolders belong to same sequences
        
    path_dict(type: dict) : the dictionary includes file paths whose keys are corresponding to their subfolders
    cases_info (type: pandas.dataframe) : the dataframe includes each case' case_ids in different subfolders
    
    output: mutual_MRC(type: list) : the list contains of dictionaries which are including
                            feature informations and MRC results belong to each different subfolder from same sequences
    
    
    """
    
    # create a list for store MRC dataframes
    mutual_MRC = []    
    # get subfolder list for creating dataframe with MRC results from each subfolder
    subfolders = list(path_dict.keys())
    
    for case_num in range(len(cases_info)):
        
        case_ids = cases_info.iloc[case_num, 1:]
        
        ## taking 1 subfolder results for creating template dataframe
        for i, subfolder in enumerate(subfolders):            
            template_case = case_ids[subfolder]                        
            if type(template_case) == str :                 
                template_path = [path for path in path_dict[subfolder] if template_case in path][0]
                df = pd.read_csv(template_path)

                
                ## record which subfolder number were taken as a template
                template_folder = subfolder
                template_num = i
                ## end searching for template when we found a valid case_id
                break
        

        # update dataframe MRC results column name
        df = df.rename(columns = {'MRC_value': template_folder })        
        
        ## unite dataframe with remained subfolders
        for subfolder in (subfolders[template_num + 1 :]):            
            case_id = case_ids[subfolder]            
            if type(case_id) == str :             
                case_path = [path for path in path_dict[subfolder] if case_id in path][0]
                case_df = pd.read_csv(case_path) 
                
                ## add MRC_value column to dataframe
                df[subfolder] = case_df['MRC_value']

        mutual_MRC.append(df)  
    
    return(mutual_MRC)
    

# 5 - Functions and classes for statistical analysis

In [None]:
# 5.1 

class analyse_MRC_results():
    
    """
    1. This class takes file paths, analyse data and makes Mann-Whitney U test and ROC Curve Analysis
       , gives p values and AUC results with given features
    2. class makes also correlation analysis for features
        which are found statistically meaningful based on given stat limits 
    
    parameters:
    
    control_csv_paths(type: list)(**) : control cases files csv paths
    patients_csv_paths(type: list)(**) : patients files csv paths
    ** :these params obligatory for single folder analysis, should not given if the analysis is collective
    collective_evaluation (type: bool) :
            True: collective analysis will be done 
            False: single folders analysis will be done
            
    NOTE: if  : collective_evaluation = True:
        you need to call "activate_collective_evaluation" function before taking stat results
    
    """
    
    def __init__(self, control_csv_paths = [], patients_csv_paths = [], 
                 collective_evaluation = False):
        
        
        if not collective_evaluation :
            
            ## checking given inputs if they are lists
            self.check_if_list(control_csv_paths, 'control_csv_paths')
            self.check_if_list(patients_csv_paths, 'patients_csv_paths')

            ## assign list paths
            self.control_paths = control_csv_paths
            self.patient_paths = patients_csv_paths
            
            ## directly load data
            self.contols_data = read_multipl_csv_files(self.control_paths)
            self.patients_data = read_multipl_csv_files(self.patient_paths)
            
            
    
    

    def activate_collective_evaluation(self, control_csv_paths = {}, patients_csv_paths = {},  
                                       sequence_folders = [], caseid_info_file='.xlsx',
                                       control_id = '', patient_id = ''):
        """
        this function activates collective analysis, 
        compare MRC results from different subfolders which are belong to same sequence 
        and create dataframes which are includes maximum MRC results of sequence folders
        
        parameters:
        
        control_csv_paths(type: dict) : dictionary consists of control cases file paths 
                                    whose keys are corresponding to their subfolders
        
        patients_csv_paths(type: dict) : dictionary consists of patients file paths 
                                    whose keys are corresponding to their subfolders
                                    
        sequence_folders (type: list) : includes subfolder names of sequence
        
        caseid_info_file(type: str(path_like)) : gives excel file path for clarify cases ids in subgroups
        
        control_id(type: str) : the word which represent coltrol groups main folder name
        patient_id(type: str) : the word which represent patient groups main folder name
        
        output: -- 

        """
        
        ## get cases subfolder info info 
        control_info = self.create_case_id_data(caseid_info_file, sequence_folders, control_id)        
        patient_info = self.create_case_id_data(caseid_info_file, sequence_folders, patient_id)

        # make dataframes which are includes different MRC results from same sequences MRI types
        collective_contols_data  = read_multipl_csv_files_to_dict(control_csv_paths, control_info)
        collective_patients_data = read_multipl_csv_files_to_dict(patients_csv_paths, patient_info)
        
        # get dataframes which includes maximum MRC results for statistical analysis
        self.contols_data = self.get_max_MRC(collective_contols_data)
        self.patients_data = self.get_max_MRC(collective_patients_data)
        
    
    def get_max_MRC(self, df_list):
        
        """
        this function calculate maximum MRC results among different subfolders MRC results
        
        params:
        
        df_list (type: list) : contains dataframes which are including feature informations 
                and MRC results from different subfolders
                
        outputs:
        
        data (type: list) : contains dataframes which are including feature informations 
                and maximum MRC results of different subfolders
        
        """

        
        # specify list for record each cases dataframe results
        
        data = []

        for df in df_list:            
            # create new df which is including only features info
            new_df = df.iloc[:, :5].copy()
            # add to dataframe maximums of MRC results 
            new_df['MRC_value'] = df[list(df.columns)[5:]].max(axis = 1, skipna = True)
            
            data.append(new_df)

        return(data)
    

    def take_stats(self, folder_type = '', auc_limit = 0.75, p_limit = 0.01):
        
        
        """    
        This function process the Mann Whitney U test and ROC Curve analysis and give p values and AUC results. 
        Additionally, it creates an object that includes features with 
        only meaningful stats based on defined limits.
        Additionaly in this function, meaningful features values are stored for correlation analysis 
        
        params: 
        folder_type (type: str) : subfolder or sequence for discriminate stat results origin
        auc_limit (type: float) : limit for accepting AUC result as meaningful
        p_limit (type: float) : limit for accepting p values as meaningful
        
        outputs :
        p_results (type: list) : gives features with their p values
        AUC_results (type: list) : gives features with their AUC results
        meaningful_results (type: list) : gives meaningful features with their AUC and p results

        """
        

        ## create object for store feature values to making correlation analysis
        self.values_df = pd.DataFrame()
        empty_df = True
        
        template_data = self.contols_data[0]
        
        feature_number = len(template_data)
        
        p_results, AUC_results, self.meaningful_results = [], [], []
        

        for feature_num in range(feature_number):
            
            feature_info = [folder_type] + list(template_data.iloc[feature_num, :-1].values)
            
            control_values = []
            patient_values = []
            
            for control_data in self.contols_data:
                control_values.append(control_data.iloc[feature_num, -1])
                
            for patient_data in self.patients_data:
                patient_values.append(patient_data.iloc[feature_num, -1])
             
            ## get p value(Mann-Whitney U test)
            p_value = round(find_p_value(control_values, patient_values), 4)
            ## record p value with feature info
            p_results.append(feature_info + [p_value])
            
            ## get AUC value(ROC curve)
            auc_value = round(find_auc(control_values, patient_values), 2)
            ## record AUC value with feature info
            AUC_results.append(feature_info + [auc_value])
            
            ## record meaningful stat results based on given limits
            if (auc_value > auc_limit) and (p_value < p_limit):
                self.meaningful_results.append(feature_info + [auc_value] + [p_value])
                
                # store values for corr.analysis and make column names as feature index
                temp_df = pd.DataFrame()
                temp_df[str(len(self.meaningful_results) - 1)] = control_values + patient_values
                self.values_df = pd.concat([self.values_df, temp_df], axis = 1, join='outer')

            
        return(p_results, AUC_results, self.meaningful_results)
    
    
    def power_analysis(self):
        
        """
        
        this function search parameters features and for features who who has same parameters for 
        " 'MRI_type', 'shape', 'primary_rate', 'secondary_rate', 'step' " but different for power,
        select the feature with highest AUC value and drop others.
        
        parameters:
        
        -- use self.meaningful results without calling withing function 
            which is list of all meaningful features
            
        outputs:
        
        df (type: list) : The list of remaining features after
                        dropping the feature has the same params but different power
        
        
        
        """
        ## checking if list is not empty
        if self.meaningful_results:
        
            ## create dataframe from meaningful result and sort based on AUC values
            feature_column = ['MRI_type', 'shape', 'primary_rate', 'secondary_rate', 'step', 'power', 'AUC', 'p']
            df = pd.DataFrame(data = self.meaningful_results, columns = feature_column)

            ## sorting df based on AUC values
            df = df.sort_values(by = 'AUC', ascending = False)

            ## dropping if all features same but power, keeping the rows with highes AUC value
            df.drop_duplicates(subset=df.columns[:5], keep='first',  inplace=True, ignore_index=True)

            ## convert dataframe to list 
            df = df.values.tolist()
            
            return(df)
        
        else:
            return([])
        
    
        
            
    def create_case_id_data(self, info_path, sequence_folders, situation):
        
        """
        this function reads an excel file and find per case case_ids in each subfolder and give as a dataframe
        
        params:
        
        info_path (type: str (path_like)) : excel file path which includes case and case ids info
        sequence_folders (type: list) : includes subfolders names which are belong to same sequence
        situation(type: str) : define if the desired dataframe will be created for patient or control group
        
        outputs:
        
        info_df (type: dict) : consists of case names and their ids within different subfolders
        
        """

        ## reading info file
        file_ = pd.read_excel(info_path, sheet_name = None)
        ## taking sheet names
        sheet_names = list(file_.keys())
        ## filtering sheet names based on situation
        sheet_names = [sheet for sheet in sheet_names if situation in sheet]
        ## filtering sheet names depend on subfolders which are belong to same sequences
        sheet_names = [sheet for sheet in sheet_names for subfolder in sequence_folders if subfolder in sheet]
        
        ##create dataframe for loading infos
        info_df = pd.DataFrame(columns = ['input_name'])

        for sheet in sheet_names :
            
            df = file_[sheet][['input_name', 'case_name']]
            new_column_name = sheet.split(situation + '_')[1]
            df = df.rename(columns = {'case_name': new_column_name })
            info_df = pd.merge(info_df, df, how = "outer", on=['input_name'])

        return (info_df)
        
            
            

    def check_if_list(self, list_, list_name):
        
        """
        this function check if input is a list , 
        if not, the function will be stop running code 
        and gives a message with object name for correct inputs
        
        params:
        
        list_(type: -) : this will be check if it is a list or not
        list_name (type: str) : this is an object name will be in error message if input (list_) is not a list
        
        
        """
        if type(list_) is not list:
            print(f' Error Message : {list_name} input is not a list')
            exit()
            
       

In [None]:
# 5.2 

def find_p_value (list1, list2):
    
    """
    this function gives p value of Mann-Whitney U test between given 2 list members
    
    params  : 
    list1 (type : list)  : list includes numeric values
    list2 (type : list)  : list includes numeric values

    output: 
    p (type : float) : this value has a mean for statistical analysis 
    """
    
    U1, p = mannwhitneyu(list1, list2, method="exact")
    
    return(p)



In [None]:
# 5.3 

def find_auc(controls_values = [], patient_values = []):
    
    """
    this function gives AUC values which is area under ROC curve
    
    params  : 
    controls_values (type : list)  : this list consists controls MRC values
                         
    patient_values (type : list)  : this list consists patients MRC values

    output: 
    roc_auc (type : float) : area under ROC curve
                            this value has a mean for statistical analysis
    
    """
    situation_labels = [0] * len(controls_values) + [1] * len(patient_values)
    values = controls_values + patient_values
    
    fpr, tpr, threshold = metrics.roc_curve(situation_labels, values)
    roc_auc = metrics.auc(fpr, tpr)
    
    return(roc_auc)

In [None]:
# 5.4

def save_stat_to_csv(data, labels = [], path = '.csv', sort_columns = False,
                     ascending_order = True, sorting_columns = []):
    """
    this function create dataframe with given data and column names (labels) and save to csv file
    
    data (type: list) : includes features and stat results
    labels(type : list) : includes column names of data
    path (type: str (path_like)) : specify path to save csv file
    ascending_order (type: bool) : specify if we should order dataframe in ascending order or not based on "sorting_columns"
    sorting_columns (type : list) : specify column names for ordering data
    
    output: -
    """

    df = pd.DataFrame(data, columns = labels)
    if sort_columns:
        df = df.sort_values(by=sorting_columns, ascending=ascending_order) ## reorder rows based on better stat results
    df.to_csv(path, index=False)
