# Extracting data from DICOM files
### This notebook extract data from Dicom files using the dicomTags.

#### Installing required packages:

1. Create a new environment (https://docs.python.org/3/library/venv.html)

```python
python3 -m venv /path/to/new/virtual/environment
```
2. Activate the new environment

```python
source env/bin/activate
```

3. Install required packages

```python
pip install -r requirements.txt
```

4. Run the notebook.  :)

In [None]:
import pandas as pd
import numpy as np
from pydicom import dcmread
import re
import glob

DICOM_file: The path containing the Dicom File.
tags_file_osiris: the file containing the dicomTags to be extracted.
For this example the original excel file of Christophe is used.

In [None]:
DICOM_file = dcmread("dicom_file.dcm")
DICOM_file = dcmread("files/1-01.dcm")
tags_file_osiris = pd.read_excel(
    "modele_osiris_radiomics.xlsx", sheet_name="Feuil1", header=0
)

#### Display tags_file_osiris and Dicom File

In [None]:
tags_file_osiris

In [None]:
print(DICOM_file)

### Extracting the data and building a data frame

In [None]:
list_tags = []
dicom_df = pd.DataFrame(columns=["tag", "name", "value"])

# extraction of dicomTags from the excel file
for j in range(len(tags_file_osiris)):
    try:
        tag_ext = re.search(r"\((.*?)\)", tags_file_osiris["Source"][j]).group(1)
        tag_conv = "0x" + tag_ext[0:4] + tag_ext[5:]
        list_tags.append(tag_conv)
    except:
        pass

# searching for the tags in the dicom file and building the dataframe
for i in list_tags:
    try:
        df_tag = str(DICOM_file[i].tag)
        df_name = DICOM_file[i].name
        df_value = DICOM_file[i].value

        dicom_df = dicom_df.append(
            {"tag": df_tag, "name": df_name, "value": df_value}, ignore_index=True
        )

    except:
        pass


dicom_df

#### Exporting to CSV

In [None]:
dicom_df.to_csv('dicom_data-osiris.csv')

### Working with file lists and building a transposed dataframe

In [None]:
dcm_files = (glob.glob("files/*.dcm"))

In [None]:
list_tags = []


# extraction of dicomTags from the excel file
for j in range(len(tags_file_osiris)):
    try:
        tag_ext = re.search(r"\((.*?)\)", tags_file_osiris["Source"][j]).group(1)
        tag_conv = "0x" + tag_ext[0:4] + tag_ext[5:]
        list_tags.append(tag_conv)
    except:
        pass

dicom_df = pd.DataFrame(columns=["file","tag", "value"])
dicom_dict = {}

# searching for the tags in the dicom file and building the dataframe
for i in dcm_files:
    DICOM_file = dcmread(i)
    dicom_dict[i] = {}
    for j in list_tags:
        try:
            tag = str(DICOM_file[j].tag)
            value = DICOM_file[j].value
            dicom_dict[i][tag] = value
        except:
            pass

In [None]:
df_dicom = pd.DataFrame.from_dict(dicom_dict, orient='index')

In [None]:
df_dicom.head(10)

#### Just for fun!

In [None]:
import matplotlib.pyplot as plt

plt.imshow(DICOM_file.pixel_array, cmap=plt.cm.gray)
plt.show()