# Chemotherapy Resistant Biopsied Exploration
For exploration of chemotherapy resistance in biopsied samples, this Jupyter Notebook provides a playground for analyzing and visualizing the data.

In [1]:
import os
import numpy as np
import pandas as pd
import ast
import matplotlib.pyplot as plt
import glob

In [2]:
DATA_DIR = os.path.join('data', 'chemo_res')
RES_DATA = os.path.join(DATA_DIR, 'Resistant')
SEN_DATA = os.path.join(DATA_DIR, 'Sensitive')

RES_FILES = glob.glob(os.path.join(RES_DATA, '*.png'))
SEN_FILES = glob.glob(os.path.join(SEN_DATA, '*.png'))

In [9]:
# Extracting the patient number, still image number, and lesion/region number from the filenames
data = {}

for filetype, files in zip(['res', 'sen'], [RES_FILES, SEN_FILES]):
    patient_numbers = []
    still_image_numbers = []
    lesion_numbers = []

    for filename in files:
        _, patient_image, region = os.path.basename(filename).split('_')
        patient_number, still_image_number = patient_image.split('.')
        lesion_number, _ = region.split('.')
        
        patient_numbers.append(patient_number)
        still_image_numbers.append(still_image_number)
        lesion_numbers.append(lesion_number)

    data[filetype] = pd.DataFrame({'Patient Number': patient_numbers, 'Still Image Number': still_image_numbers, 'Lesion/Region Number': lesion_numbers})

print(data)

{'res':     Patient Number Still Image Number Lesion/Region Number
0              100                  1                    1
1              100                  1                   10
2              100                  1                    2
3              100                  1                    3
4              100                  1                    4
..             ...                ...                  ...
365             99                  6                    5
366             99                  6                    6
367             99                  6                    7
368             99                  6                    8
369             99                  6                    9

[370 rows x 3 columns], 'sen':     Patient Number Still Image Number Lesion/Region Number
0              108                  1                    1
1              108                  1                   10
2              108                  1                    2
3              1

In [10]:
data['res'].describe()

Unnamed: 0,Patient Number,Still Image Number,Lesion/Region Number
count,370,370,370
unique,12,7,10
top,100,1,4
freq,60,100,37


In `RES_FILES`
* 12 unique patients
* 7 unique still image numbers
* 10 unique region numbers
```python
12 * 7 * 10 = 840 > 370
```

In [11]:
data['sen'].describe()

Unnamed: 0,Patient Number,Still Image Number,Lesion/Region Number
count,430,430,430
unique,15,10,10
top,85,1,4
freq,60,110,43


In `RES_FILES`
* 15 unique patients
* 10 unique still image numbers
* 10 unique region numbers
```python
15 * 10 * 10 = 1500 > 430
```