# Analyze the raw dcm images.

Now we have the raw data from Dr. AMMARI, and we need to do some data preprocessing and extract the useful information for further research.

- 1) We have only dicom image data, but all the information for each image are saved in the dicom files, we need to analyze it.We define the DcmData.py class to do it. The arranged data information are saved in AllDatas.xlsx.


- 2) The images are from different series, and we need to arrange the information by series, and design our experiements based on series. Results are saved in All_SeriesInfo.xlsx.


- 3) There are some kinds of image types, but in our first stage of experiments, we will only use T1 imags. Results are saved in T1_SeriesInfo.xlsx.


- 4) For each T1 series, we will resave the images as .nii.gz images.



In [None]:
import os
import pandas as pd
import numpy as np
import shutil

from DcmData import DcmData
from analyzeSettings import *
import sys
sys.path.append("../")
from utils.myUtils import get_filenames, dcm2nii, mkdir, show_nii_image_slices

### 1) Extract useful information from the dicom files, and save them in AllDatas.xlsx file.

In [None]:
def extract_infos_from_dicoms(patient, original_image_path, save_excel_path, show_logs=False):
    '''
    get information from each dicom file, and save them in a dataframe.
    '''
    
    fileList=get_filenames(original_image_path, filter_list=None)
    print('There are totally {} images in {}.'.format(len(fileList),original_image_path))

    i=0
    for file in fileList:

        # get image information from dicom image
        dcmInfo=DcmData(patient, file)
        dataInfos=dcmInfo.get_dataInfos() 
        
        # save these image informations to a DataFrame
        dataInfos = pd.DataFrame(dataInfos,index = [i])
        if i==0:
            MyData = dataInfos
        else:      
            MyData=pd.concat([MyData,dataInfos])

        i=i+1
            
        # show the logs, used for debug
        if show_logs:
            print('*************************************************************************')
            print(file)
            print(dataInfos)
            dcmInfo.show_all_attributes()
            
        
        
    #Save the Dataframe to a excel file
    MyData.reset_index(drop=True, inplace=True)
    MyData.to_excel(save_excel_path)
    print('Sucessfully save data information in {}.\n'.format(save_excel_path))
    

def main_extract_allDataInfo():
    Original_ImagePathList=get_Original_ImagePathList()
    AllData_ExcelList=get_AllData_ExcelList()

    for key, original_image_path in Original_ImagePathList.items():
        patient=key.split("_")[0]
        save_excel_path=AllData_ExcelList[key]
        extract_infos_from_dicoms(patient, original_image_path,save_excel_path)

In [None]:
main_extract_allDataInfo()

### 2) Extract the information for all series and for T1 series; And resave T1 series images in a .nii.gz file.

In [None]:

def extract_series_infos(original_image_path, all_data_excel_path, save_all_series_excel_path, save_T1_excel_path, save_nii_basepath):
    #read the data information from the excel
    all_data=pd.read_excel(all_data_excel_path)
       
    # Define a dataframe to save the series information.
    slice_info_name_list=['Patient', 'FOV', 'Matrix', 'MagneticFieldStrength', 'ImageType','MRAcquisitionType','SeriesDescription']
    statistic_list=['SeriesNumber', 'NumberOfImages', 'ReTest']+slice_info_name_list
    AllSeries_Info=pd.DataFrame(columns=statistic_list)
    
    #add the information of the SeriesNumber and NumberOfImages
    SeriesNumber=all_data['SeriesNumber'].value_counts()
    AllSeries_Info['SeriesNumber']=SeriesNumber.index
    AllSeries_Info['NumberOfImages']=SeriesNumber.values
    AllSeries_Info=AllSeries_Info.sort_values(by="SeriesNumber" , ascending=True)
    AllSeries_Info.reset_index(drop=True, inplace=True)
    
    
    #add the other information for each series.
    for series in AllSeries_Info['SeriesNumber']:
        data_in_series_i=all_data.loc[all_data["SeriesNumber"]==series]
        
        #extract the info for a series
        for attribute in slice_info_name_list:
            value=data_in_series_i[attribute].unique()
            AllSeries_Info.loc[AllSeries_Info["SeriesNumber"]==series,attribute] = value

    
        #judge the series is a test experiment or a re-test experiment 
        desription=AllSeries_Info.loc[AllSeries_Info["SeriesNumber"]==series,'SeriesDescription'].iloc[0]
        AllSeries_Info.loc[AllSeries_Info["SeriesNumber"]==series,'ReTest'] =True if ( 'TARDIF' in  desription or  'TARDIVE' in  desription ) else False
        
        #resave T1 images
        series_info=AllSeries_Info.loc[AllSeries_Info["SeriesNumber"]==series]
        if series_info['ImageType'].iloc[0]=='T1':
            series_file_paths=[os.path.join(original_image_path, image_name) for image_name in data_in_series_i['ImageName']]
            save_T1_images(series_file_paths, save_nii_basepath, series_info)

        
        
    # filter data with condition: ImageType==T1
    T1_Series_Info=AllSeries_Info.loc[ AllSeries_Info["ImageType"] .isin(['T1'])]
    #T1_Series_Info=T1_Series_Info[~ T1_Series_Info["SeriesDescription"].str.contains('TARDIF|TARDIVE')]
    T1_Series_Info.reset_index(drop=True, inplace=True)
    
    #Save the statistic information into the excel.
    AllSeries_Info.to_excel(save_all_series_excel_path)        
    T1_Series_Info.to_excel(save_T1_excel_path)
    print('\nSucessfully save the statistic information in {} and {}.'.format(save_all_series_excel_path,save_T1_excel_path)) 
    
    return AllSeries_Info,T1_Series_Info

def save_T1_images(series_file_paths, save_image_basepath, series_info, show_resaved_nii=False):  
    
    patient=str(series_info['Patient'].iloc[0])
    series_number=str(series_info['SeriesNumber'].iloc[0])
    series_FOV=str(int(series_info['FOV'].iloc[0]))
    series_matrix=str(series_info['Matrix'].iloc[0])
    series_magnetic=str(series_info['MagneticFieldStrength'].iloc[0])
    dest_nii_basename=patient+'_Series-'+series_number+'_FOV-'+series_FOV+'_matrix-'+series_matrix+'_'+series_magnetic+'T'
    
    #nii file path to save .nii file
    save_image_basepath=os.path.join(save_image_basepath, dest_nii_basename)
    dest_nii_filepath=os.path.join(save_image_basepath, dest_nii_basename+'.nii.gz')
    
    #copy the files to one folder for resaving them as .nii file.
    save_dcm_basepath=os.path.join(save_image_basepath, series_number)
    mkdir(save_dcm_basepath)
    for file_path in series_file_paths:
        shutil.copy(file_path, os.path.join(save_dcm_basepath, os.path.basename(file_path)))
        
    #resave dcm files as a .nii file.
    dcm2nii(save_dcm_basepath, dest_nii_filepath)
    shutil.rmtree(save_dcm_basepath)

    #show the resaved .nii image to check
    if show_resaved_nii:
        show_nii_image_slices(dest_nii_filepath)
        

In [None]:

def main_extract_series_info():
    Original_ImagePathList=get_Original_ImagePathList()
    AllData_ExcelList=get_AllData_ExcelList()
    AllSeries_ExcelList=get_AllSeries_ExcelList()
    T1_ExcelList=get_T1_ExcelList()
    nii_basepathList=get_nii_basepath()
    
    for key,all_data_excel_path in AllData_ExcelList.items():
        print('\n\n**************************   {}  **********************************'.format(key))
        original_image_path=Original_ImagePathList[key]
        save_all_series_excel_path=AllSeries_ExcelList[key]
        save_T1_excel_path=T1_ExcelList[key]
        save_nii_basepath=nii_basepathList[key]

        Series_Info,T1_Series_Info=extract_series_infos(original_image_path, all_data_excel_path,
                                                       save_all_series_excel_path, save_T1_excel_path, save_nii_basepath)
        
        print('\n----------------------  Statistic for T1 images -------------------------\n',T1_Series_Info) 


In [None]:
main_extract_series_info()