In [1]:
%matplotlib inline

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import pydicom
import os
import scipy.ndimage
import matplotlib.pyplot as plt
from radiomics import featureextractor

from skimage import measure, morphology
from mpl_toolkits.mplot3d.art3d import Poly3DCollection


In [2]:
import pylidc as pl
import os

from pylidc.utils import consensus
from pylidc.utils import volume_viewer

import matplotlib.cm as cm
import matplotlib.animation as manim

from skimage.measure import find_contours

from sqlalchemy import func

# 1. Carregar e Explorar Data

In [3]:
#Carregar o ficheiro metadata que contém os diretórios de cada imagem
metadata = pd.read_csv('metadata.csv')
metadata.head()

Unnamed: 0,Subject ID,Study UID,Study Description,Study Date,Series ID,Series Description,Number of images,File Size (Bytes),Collection Name,Modality,Manufacturer
0,LIDC-IDRI-1001,1.3.6.1.4.1.14519.5.2.1.6279.6001.281499745765...,,2000-01-01 00:00:00.0,1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222...,,194,102096016,LIDC-IDRI,CT,SIEMENS
1,LIDC-IDRI-0778,1.3.6.1.4.1.14519.5.2.1.6279.6001.174809695196...,CHEST,2000-01-01 00:00:00.0,1.3.6.1.4.1.14519.5.2.1.6279.6001.100332161840...,Recon 2:,481,253245158,LIDC-IDRI,CT,GE MEDICAL SYSTEMS
2,LIDC-IDRI-0813,1.3.6.1.4.1.14519.5.2.1.6279.6001.139110171863...,,2000-01-01 00:00:00.0,1.3.6.1.4.1.14519.5.2.1.6279.6001.100398138793...,,157,82630786,LIDC-IDRI,CT,SIEMENS
3,LIDC-IDRI-0710,1.3.6.1.4.1.14519.5.2.1.6279.6001.116146223752...,CHEST,2000-01-01 00:00:00.0,1.3.6.1.4.1.14519.5.2.1.6279.6001.100530488926...,Recon 2: ACRIN LARGE,471,247987448,LIDC-IDRI,CT,GE MEDICAL SYSTEMS
4,LIDC-IDRI-0410,1.3.6.1.4.1.14519.5.2.1.6279.6001.818775038273...,CT LUNG SCREEN,2000-01-01 00:00:00.0,1.3.6.1.4.1.14519.5.2.1.6279.6001.100620385482...,,126,66314722,LIDC-IDRI,CT,GE MEDICAL SYSTEMS


In [4]:
rows = len(metadata)
rows

1308

In [5]:
dir = metadata.loc[0]['File Location']
dir

KeyError: 'File Location'

In [None]:
#Abrir imagem com pydicom
CT_folder = os.path.join(dir)

ds = pydicom.read_file(os.path.join(CT_folder, '1-1.dcm'))
ds

In [None]:
#Pixel data
image = ds.pixel_array
image.shape

In [None]:
#Note that the image is a 2D array. Typically the pixel values are stored in a scaled format so we should adjust them:
image = ds.RescaleSlope * image + ds.RescaleIntercept
#Assim, a imagem fica em Housefield units outra vez

plt.pcolormesh(image, cmap='Greys_r')
plt.colorbar(label='HU')
plt.axis('off')
plt.show()

# 1.1. Pylidc

In [None]:
scans = pl.query(pl.Scan).all()
print(scans[0])
print(len(scans))

In [None]:
print(scans[0].patient_id,
      scans[0].pixel_spacing,
      scans[0].slice_thickness,
      scans[0].slice_spacing)

In [None]:
print(len(scans[0].annotations))

In [None]:
scans[0].annotations

In [None]:
nods = scans[0].cluster_annotations()

print("%s has %d nodules." % (scans[0], len(nods)))

for i,nod in enumerate(nods):
    print("Nodule %d has %d annotations." % (i+1, len(nods[i])))

In [None]:
ann = pl.query(pl.Annotation).first()
ann.visualize_in_scan()

In [None]:
ann.visualize_in_3d(edgecolor='green', cmap='autumn')

In [None]:
scans[0].visualize(annotation_groups=nods)

In [None]:
ann.print_formatted_feature_table()

In [None]:
ann.feature_vals(return_str=False)

In [None]:
len(ann.feature_vals(return_str=False))

In [None]:
print("%.2f, %.2f, %.2f" % (ann.diameter,ann.surface_area,ann.volume))

In [None]:
# Centróide
i,j,k = ann.centroid

vol = ann.scan.to_volume()

plt.imshow(vol[:,:,int(k)], cmap=plt.cm.gray)
plt.plot(j, i, '.r', label="Nodule centroid")
plt.legend()
plt.show()

In [None]:
# uniform_cubic_resample
# resampled volumes will have uniform side length of 70mm and
# uniform voxel spacing of 1mm.
n = 70
vol,mask = ann.uniform_cubic_resample(n)

# Setup the plot.
img = plt.imshow(np.zeros((n+1, n+1)),
                 vmin=vol.min(), vmax=vol.max(),
                 cmap=plt.cm.gray)


# View all the resampled image volume slices.
for i in range(n+1):
    img.set_data(vol[:,:,i] * (mask[:,:,i]*0.6+0.2))

    plt.title("%02d / %02d" % (i+1, n))
    plt.pause(0.1)


In [None]:
#Plotting a contour on top of the image volume
vol = ann.scan.to_volume()
con = ann.contours[3]

k = con.image_k_position
ii,jj = ann.contours[3].to_matrix(include_k=False).T

plt.imshow(vol[:,:,46], cmap=plt.cm.gray)
plt.plot(jj, ii, '-r', lw=1, label="Nodule Boundary")
plt.legend()
plt.show()

In [None]:
vol = ann.scan.to_volume()

padding = 70.0

mask = ann.boolean_mask(pad=padding)
bbox = ann.bbox(pad=padding)

volume_viewer(vol[bbox], mask, ls='-', lw=2, c='r')

# 2. Extrair Features

## 2.1. Testes

In [None]:
anns = pl.query(pl.Annotation)

In [None]:
a1= anns[0]
a1

In [None]:
#Extrair 
print(a1.scan.patient_id)
print(a1._nodule_id)
# Extrair o id da anotação:
print(a1.id)
# Extrair o id do scan da anotação
print(a1.scan_id)

In [None]:
a1.print_formatted_feature_table()

In [None]:
print(a1.subtlety)
print(a1.internalStructure)
print(a1.spiculation)
print(a1.calcification)
#etc

In [None]:
print("%.2f, %.2f, %.2f" % (a1.diameter,
                            a1.surface_area,
                            a1.volume))

In [None]:
print(a1.diameter)
print(a1.surface_area)
print(a1.volume)

## 2.2. Criar um Dataframe

## Testes:

In [None]:
df = pd.DataFrame(columns=['Id_Annotation','Id_Scan','Subtlety', 'Internalstructure', 'Calcification','Sphericity','Margin','Lobulation',
                           'Spiculation','Texture','Malignancy'])

In [None]:
df.columns

In [None]:
df

In [None]:
ann = a1
row = [ann.id, ann.scan_id ,ann.subtlety,ann.internalStructure,ann.calcification,ann.sphericity,ann.margin,ann.lobulation,
           ann.spiculation,ann.texture,ann.malignancy]
type(row[0])

In [None]:
df.loc[0] = row
df

In [None]:
type(df.loc[0, 'Id_Scan'])

-------------------------------------------------------

In [None]:
df

## Funções:

##### Função que recebe uma anotação e transforma numa row para o dataframe:

In [None]:
def ann_row(ann):
    ids = np.array([ann.scan.patient_id, ann._nodule_id, ann.id, ann.scan_id], dtype = '<U14')
    features = ann.feature_vals()
    return(ids, features)    

##### Função que recebe uma lista de anotações e cria um dataframe

In [None]:
def anns_df(anotacoes):
    
    #Garantir que estamos a trabalhar com uma lista
    if  not isinstance(anotacoes, list):
        anotacoes = [anotacoes]
    
    #Criar 2 dataframes, uma para os ids e outro para as features:
    df_id = pd.DataFrame(columns = ["Patient_id", "Nodule_id", "Annotation_id", "Scan_id"])
    df_feat = pd.DataFrame(columns=['Subtlety', 'Internalstructure', 'Calcification','Sphericity','Margin','Lobulation',
                           'Spiculation','Texture','Malignancy'])
    
    for i, ann in enumerate(anotacoes):
        array_id, array_feat = ann_row(ann)
        df_id.loc[i] = array_id
        df_feat.loc[i] = array_feat

    df = pd.concat([df_id, df_feat], axis = 1)
    return(df)     

In [None]:
annotations = pl.query(pl.Annotation).all()
len(annotations)
anotacoes = annotations[:200]
data = anns_df(anotacoes)
data