# Segmentação e reconhecimento de células em exames de Papanicolau

### Componentes do grupo: Rafael Lopes Murta

## Importações

In [2]:
import cv2
import pandas as pd
import numpy as np
from PIL import Image
from os import listdir, makedirs
from os.path import isfile, join, exists
import matplotlib.pyplot as plt

In [3]:
classifications_csv = pd.read_csv('classifications.csv')

## Visualizações

In [4]:
classifications_csv.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11534 entries, 0 to 11533
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   image_id         11534 non-null  int64 
 1   image_filename   11534 non-null  object
 2   image_doi        11534 non-null  object
 3   cell_id          11534 non-null  int64 
 4   bethesda_system  11534 non-null  object
 5   nucleus_x        11534 non-null  int64 
 6   nucleus_y        11534 non-null  int64 
dtypes: int64(4), object(3)
memory usage: 630.9+ KB


In [5]:
classifications_csv.head()

Unnamed: 0,image_id,image_filename,image_doi,cell_id,bethesda_system,nucleus_x,nucleus_y
0,400,9ae8a4edde40219bad6303cebc672ee4.png,10.6084/m9.figshare.12230906,1,SCC,792,462
1,400,9ae8a4edde40219bad6303cebc672ee4.png,10.6084/m9.figshare.12230906,2,SCC,601,678
2,400,9ae8a4edde40219bad6303cebc672ee4.png,10.6084/m9.figshare.12230906,3,SCC,363,467
3,400,9ae8a4edde40219bad6303cebc672ee4.png,10.6084/m9.figshare.12230906,4,SCC,599,437
4,400,9ae8a4edde40219bad6303cebc672ee4.png,10.6084/m9.figshare.12230906,5,Negative for intraepithelial lesion,1186,450


In [6]:
classifications_csv['bethesda_system'].drop_duplicates()

0                                     SCC
4     Negative for intraepithelial lesion
6                                    LSIL
8                                    HSIL
9                                   ASC-H
98                                 ASC-US
Name: bethesda_system, dtype: object

## Funções

In [7]:
# Corta a imagem no tamanho de 100x100 pixels
def crop_image(image, x, y):    
    left = x - 50
    right = x + 50
    top = y - 50
    bottom = y + 50

    image = image.crop((left, top, right, bottom))
    return image

In [8]:
def get_img_dir(bethesda_system):
    if bethesda_system == 'SCC' or bethesda_system == 'LSIL' or bethesda_system == 'HSIL' or bethesda_system == 'ASC-H' or bethesda_system == 'ASC-US':
        return 'editted_images/' + bethesda_system.lower() + '/'
    elif bethesda_system == 'Negative for intraepithelial lesion':
        return 'editted_images/nfil/'


In [9]:
def get_new_nucleus_position(image, x, y):
    width, height = image.size
    if (x >= 50 and x <= (width - 50)):
        new_x = 50
    elif (x < 50):
        new_x = x
    else:
        new_x = 50 + (width - x)

    if (y >= 50 and y <= (height - 50)):
        new_y = 50
    elif (y < 50):
        new_y = y
    else:
        new_y = 50 + (height - y)
    
    return new_x, new_y

In [10]:
def process_image(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary_image = cv2.threshold(gray_image, 128, 255, cv2.THRESH_BINARY)
    kernel = np.ones((10, 10), np.uint8)
    return cv2.morphologyEx(binary_image, cv2.MORPH_CLOSE, kernel)

In [11]:
def get_shape_descriptors(image):
    contours, _ = cv2.findContours(image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        contour = contours[0]
        area = cv2.contourArea(contour)
        perimeter = cv2.arcLength(contour, True)

        if area > 0:
            compactness = (perimeter**2) / (4 * np.pi * area)
            moments = cv2.moments(contour)
            hu_moments = cv2.HuMoments(moments).flatten()
            rect = cv2.minAreaRect(contour)
            axis_x = rect[1][0]
            axis_y = rect[1][1]

            if axis_x > axis_y:
                eccentricity = axis_x / axis_y
            else:
                eccentricity = axis_y / axis_x

            return [area, perimeter, compactness, hu_moments, eccentricity]
        
    return None


## Salvar as imagens nas respectivas pastas e Segmentar

In [13]:
dir_list = ['scc', 'lsil', 'hsil', 'asc-h', 'asc-us', 'nfil']
makedirs('editted_images', exist_ok=True)
for i in dir_list:
    makedirs(f'editted_images/{i}', exist_ok=True)
dataframe = pd.DataFrame(columns=['original_filename', 'cell_id', 'bethesda_system', 'area', 'perimeter', 'compactness', 'eccentricity', 'circularity', 'distance_from_original_nucleus'])
onlyfiles = [f for f in listdir('images') if isfile(join('images', f))]
for index, row in classifications_csv.iterrows():
    if row["image_filename"] in onlyfiles:
        original_image_filepath = f'images/{row["image_filename"]}'
        image = Image.open(original_image_filepath)
        croped_image = crop_image(image, row['nucleus_x'], row['nucleus_y'])
        croped_image_filepath = f'{get_img_dir(row["bethesda_system"])}{row["cell_id"]}.jpg'
        croped_image.save(croped_image_filepath)
        cropped_image_np = process_image(np.asarray(croped_image))
              
        # Limiarização
        center_y, center_x = cropped_image_np.shape[0] // 2, cropped_image_np.shape[1] // 2
        blurred_image = cv2.GaussianBlur(cropped_image_np, (5, 5), 0)
        valor_pixel_central = blurred_image[center_y, center_x] * 1.37
        _, img_thresholded = cv2.threshold(
            blurred_image, valor_pixel_central, 255, cv2.THRESH_BINARY)
            
        sobelx = cv2.Sobel(img_thresholded, cv2.CV_64F, 1, 0, ksize=3)
        sobely = cv2.Sobel(img_thresholded, cv2.CV_64F, 0, 1, ksize=3)
        sobel = cv2.magnitude(sobelx, sobely)
            
        # Contornos
        contours, _ = cv2.findContours(sobel.astype('uint8'), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

        center_x, center_y = cropped_image_np.shape[1] // 2, cropped_image_np.shape[0] // 2
        minimum_distance = float('inf')
        central_contour = None

        # Contorno mais ao centro
        for contour in contours:
            M = cv2.moments(contour)
            if M["m00"] != 0:
                cx = int(M["m10"] / M["m00"])
                cy = int(M["m01"] / M["m00"])
                distance = np.sqrt((center_x - cx) ** 2 + (center_y - cy) ** 2)
                if distance < minimum_distance:
                    minimum_distance = distance
                    central_contour = contour

        area = 0
        perimeter = 0
        eccentricity = 0
        compactness = 0
            
        if central_contour is not None:
            cv2.drawContours(cropped_image_np, [central_contour], -1, (0, 255, 0), 2)
            area = cv2.contourArea(central_contour)
            perimeter = cv2.arcLength(central_contour, True)
            compactness = (perimeter ** 2) / (4 * np.pi * area) if area != 0 else 0
            circularity = (4 * np.pi * area) / (perimeter ** 2) if perimeter != 0 else 0
            if len(central_contour) >= 5:  # Necessário para ajustar uma elipse
                (x, y), (eixo_menor, eixo_maior), angle = cv2.fitEllipse(central_contour)
                eccentricity = np.sqrt(1 - (eixo_menor / eixo_maior) ** 2) if eixo_maior != 0 else 0
            else:
                eccentricity = 0
            cv2.circle(cropped_image_np, (center_x, center_y), 2, (255, 0, 0), -1)

            dataframe.loc[dataframe.size] = [original_image_filepath, row['cell_id'], row['bethesda_system'], area, perimeter, compactness, eccentricity, circularity, minimum_distance]

dataframe.to_csv('characteristics.csv', index=False)

: 