# Prepare MosMedData

## Get data downloaded

In [None]:
!wget -P /mnt/data/machine-learning http://storage.yandexcloud.net/covid19.1110/prod/COVID19_1110.zip
!unzip /mnt/data/machine-learning/COVID19_1110.zip -d /mnt/LSDF/users/zharov

## Prepare unsupervised dataset

In [1]:
!mkdir /mnt/LSDF/users/zharov/COVID19_1110/studies/prepared

mkdir: cannot create directory ‘/mnt/LSDF/users/zharov/COVID19_1110/studies/prepared’: File exists


In [4]:
import tifffile
from medpy.io import load as medload
import numpy as np
from glob import glob
from skimage.exposure import rescale_intensity
from tqdm.auto import tqdm
import os
import re

inner_medload = lambda a: np.swapaxes(medload(a)[0], 0, -1)

In [2]:
all_addrs = glob('/mnt/LSDF/users/zharov/COVID19_1110/studies/CT-*/*.nii.gz')

In [3]:
len(all_addrs)

1110

In [3]:
extract_experiment_id = re.compile(r'study_(\d+).nii.gz')

In [10]:
for addr in tqdm(all_addrs):
    img = inner_medload(addr)
    filename = os.path.split(addr)[1]
    experiment_id = extract_experiment_id.findall(filename)[0]
    img = rescale_intensity(img, in_range=(-900, 500), out_range=(0, 255)).astype(np.uint8)
    tifffile.imsave(f'/mnt/LSDF/users/zharov/COVID19_1110/studies/prepared/{experiment_id}.tiff', img)

  0%|          | 0/1110 [00:00<?, ?it/s]

### Prepare data >=32 slices

In [7]:
!mkdir /mnt/LSDF/users/zharov/COVID19_1110/studies/prepared_32
!cp -r /mnt/LSDF/users/zharov/COVID19_1110/studies/prepared/* /mnt/LSDF/users/zharov/COVID19_1110/studies/prepared_32

In [13]:
lens = []
for addr in glob('/mnt/LSDF/users/zharov/COVID19_1110/studies/prepared_32/*.tiff'):
    if len(tifffile.TiffFile(addr).pages) < 32:
        print(addr)
        os.remove(addr)

/mnt/LSDF/users/zharov/COVID19_1110/studies/prepared_32/0815.tiff
/mnt/LSDF/users/zharov/COVID19_1110/studies/prepared_32/0793.tiff
/mnt/LSDF/users/zharov/COVID19_1110/studies/prepared_32/0691.tiff
/mnt/LSDF/users/zharov/COVID19_1110/studies/prepared_32/0409.tiff


## Prepare supervised dataset

In [2]:
extract_mask_id = re.compile(r'study_(\d+)_mask.nii.gz')

In [3]:
!mkdir /mnt/LSDF/users/zharov/COVID19_1110/uniformly_named_masks

mkdir: cannot create directory ‘/mnt/LSDF/users/zharov/COVID19_1110/uniformly_named_masks’: File exists


In [3]:
for addr in tqdm(glob('/mnt/LSDF/users/zharov/COVID19_1110/masks/*.nii.gz')):
    id = extract_mask_id.findall(os.path.split(addr)[1])[0]
    msk = inner_medload(addr).astype(np.uint8)
    tifffile.imsave(f'/mnt/LSDF/users/zharov/COVID19_1110/uniformly_named_masks/{id}.tiff', msk)

  0%|          | 0/50 [00:00<?, ?it/s]

In [4]:
len(glob('/mnt/LSDF/users/zharov/COVID19_1110/uniformly_named_masks/*.tiff'))

50

# Prepare lungs dataset

## Get data downloaded

In [None]:
!wget -P /mnt/data/machine-learning https://zenodo.org/record/3757476/files/Lung_Mask.zip
!wget -P /mnt/data/machine-learning https://zenodo.org/record/3757476/files/COVID-19-CT-Seg_20cases.zip
    
!unzip /mnt/data/machine-learning/Lung_Mask.zip -d /mnt/LSDF/users/zharov/Lung_Masks
!unzip /mnt/data/machine-learning/COVID-19-CT-Seg_20cases.zip -d /mnt/LSDF/users/zharov/COVID-19-CT-Seg_20cases

In [12]:
!mkdir /mnt/LSDF/users/zharov/CTLungs
!mkdir /mnt/LSDF/users/zharov/CTLungs/images
!mkdir /mnt/LSDF/users/zharov/CTLungs/masks

## Prepare images

In [5]:
all_image_addrs = glob('/mnt/LSDF/users/zharov/COVID-19-CT-Seg_20cases/*.nii.gz')

In [7]:
len(all_image_addrs)

20

In [34]:
for i, addr in tqdm(enumerate(all_image_addrs), total=len(all_image_addrs)):
    img = inner_medload(addr)
    if img.dtype == np.int16:
        img = rescale_intensity(img, in_range=(-900, 500), out_range=(0, 255)).astype(np.uint8)
    tifffile.imsave(f'/mnt/LSDF/users/zharov/CTLungs/images/{i}.tiff', img)

  0%|          | 0/20 [00:00<?, ?it/s]

/mnt/LSDF/users/zharov/COVID-19-CT-Seg_20cases/coronacases_009.nii.gz conversion
/mnt/LSDF/users/zharov/COVID-19-CT-Seg_20cases/coronacases_003.nii.gz conversion
/mnt/LSDF/users/zharov/COVID-19-CT-Seg_20cases/coronacases_008.nii.gz conversion
/mnt/LSDF/users/zharov/COVID-19-CT-Seg_20cases/coronacases_006.nii.gz conversion
/mnt/LSDF/users/zharov/COVID-19-CT-Seg_20cases/coronacases_005.nii.gz conversion
/mnt/LSDF/users/zharov/COVID-19-CT-Seg_20cases/coronacases_007.nii.gz conversion
/mnt/LSDF/users/zharov/COVID-19-CT-Seg_20cases/coronacases_002.nii.gz conversion
/mnt/LSDF/users/zharov/COVID-19-CT-Seg_20cases/coronacases_001.nii.gz conversion
/mnt/LSDF/users/zharov/COVID-19-CT-Seg_20cases/coronacases_010.nii.gz conversion
/mnt/LSDF/users/zharov/COVID-19-CT-Seg_20cases/coronacases_004.nii.gz conversion


## Prepare masks

In [32]:
for i, addr in tqdm(enumerate(all_image_addrs), total=len(all_image_addrs)):
    addr = os.path.join('/mnt/LSDF/users/zharov/Lung_Masks', os.path.split(addr)[1])
    img = inner_medload(addr).astype(np.uint8)
    tifffile.imsave(f'/mnt/LSDF/users/zharov/CTLungs/masks/{i}.tiff', img)

  0%|          | 0/20 [00:00<?, ?it/s]

In [15]:
all_medakas = glob('/mnt/LSDF/projects/code-vita/Medaka/2018_11/*/slices.tif')

, tifffile.TiffFile(all_medakas[0]).pages[0].shape

(3633, (2016, 2016))

In [17]:
for i in all_medakas[:30]:
    print(tifffile.TiffFile(i).pages.__len__())

3633
6356
7041
5900
6928
5902
7449
6927
6356
6926
3633
6926
6925
5909
3632
6926
6040
4215
6924
6355
7028
5908
6923
6356
6929
6927
5903
6923
5902
6055


In [27]:
all_eyes = glob('/mnt/HD-LSDF/Medaka/segmentations/workshop/eye_decropped/*.tif')

In [26]:
import re

In [42]:
all_volumes = [re.findall('\d{2}-(\d{3,4}).tif', i)[0] for i in all_eyes]

In [45]:
len(np.unique(all_volumes)), len(all_eyes), np.unique(all_volumes, return_counts=True)

(15, 21, (array(['508', '509', '511', '513', '515', '516', '671', '673', '674',
         '675', '677', '803', '833', '834', '835'], dtype='<U3'),
  array([1, 2, 2, 2, 1, 1, 2, 1, 1, 3, 1, 1, 1, 1, 1])))