<a href="https://colab.research.google.com/github/EdwFa/dm3/blob/api/topic_summary.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
!pip install -U sentence-transformers
!pip install -U lexrank



In [10]:
from sentence_transformers import SentenceTransformer, util
import numpy as np


In [16]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [11]:
import numpy as np
from scipy.sparse.csgraph import connected_components
from scipy.special import softmax
import logging

logger = logging.getLogger(__name__)

def degree_centrality_scores(
    similarity_matrix,
    threshold=None,
    increase_power=True,
):
    if not (
        threshold is None
        or isinstance(threshold, float)
        and 0 <= threshold < 1
    ):
        raise ValueError(
            '\'threshold\' should be a floating-point number '
            'from the interval [0, 1) or None',
        )

    if threshold is None:
        markov_matrix = create_markov_matrix(similarity_matrix)

    else:
        markov_matrix = create_markov_matrix_discrete(
            similarity_matrix,
            threshold,
        )

    scores = stationary_distribution(
        markov_matrix,
        increase_power=increase_power,
        normalized=False,
    )

    return scores


def _power_method(transition_matrix, increase_power=True, max_iter=10000):
    eigenvector = np.ones(len(transition_matrix))

    if len(eigenvector) == 1:
        return eigenvector

    transition = transition_matrix.transpose()

    for _ in range(max_iter):
        eigenvector_next = np.dot(transition, eigenvector)

        if np.allclose(eigenvector_next, eigenvector):
            return eigenvector_next

        eigenvector = eigenvector_next

        if increase_power:
            transition = np.dot(transition, transition)

    logger.warning("Maximum number of iterations for power method exceeded without convergence!")
    return eigenvector_next


def connected_nodes(matrix):
    _, labels = connected_components(matrix)

    groups = []

    for tag in np.unique(labels):
        group = np.where(labels == tag)[0]
        groups.append(group)

    return groups


def create_markov_matrix(weights_matrix):
    n_1, n_2 = weights_matrix.shape
    if n_1 != n_2:
        raise ValueError('\'weights_matrix\' should be square')

    row_sum = weights_matrix.sum(axis=1, keepdims=True)

    # normalize probability distribution differently if we have negative transition values
    if np.min(weights_matrix) <= 0:
        return softmax(weights_matrix, axis=1)

    return weights_matrix / row_sum


def create_markov_matrix_discrete(weights_matrix, threshold):
    discrete_weights_matrix = np.zeros(weights_matrix.shape)
    ixs = np.where(weights_matrix >= threshold)
    discrete_weights_matrix[ixs] = 1

    return create_markov_matrix(discrete_weights_matrix)


def stationary_distribution(
    transition_matrix,
    increase_power=True,
    normalized=True,
):
    n_1, n_2 = transition_matrix.shape
    if n_1 != n_2:
        raise ValueError('\'transition_matrix\' should be square')

    distribution = np.zeros(n_1)

    grouped_indices = connected_nodes(transition_matrix)

    for group in grouped_indices:
        t_matrix = transition_matrix[np.ix_(group, group)]
        eigenvector = _power_method(t_matrix, increase_power=increase_power)
        distribution[group] = eigenvector

    if normalized:
        distribution /= n_1

    return distribution

In [12]:
model = SentenceTransformer('all-MiniLM-L6-v2')

Downloading (…)e9125/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)7e55de9125/README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading (…)55de9125/config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)125/data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)e9125/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

Downloading (…)9125/train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

Downloading (…)7e55de9125/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)5de9125/modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [23]:
# Текст для саммаризации

document = """
This paper proposes a development of automatic rib sequence labeling systems on chest computed tomography (CT) images with two suggested methods and three-dimensional (3D) region growing. In clinical practice, radiologists usually define anatomical terms of location depending on the rib's number. Thus, with the manual process of labeling 12 pairs of ribs and counting their sequence, it is necessary to refer to the annotations every time the radiologists read chest CT. However, the process is tedious, repetitive, and time-consuming as the demand for chest CT-based medical readings has increased. To handle the task efficiently, we proposed an automatic rib sequence labeling system and implemented comparison analysis on two methods. With 50 collected chest CT images, we implemented intensity-based image processing (IIP) and a convolutional neural network (CNN) for rib segmentation on this system. Additionally, three-dimensional (3D) region growing was used to classify each rib's label and put in a sequence label. The IIP-based method reported a 92.0% and the CNN-based method reported a 98.0% success rate, which is the rate of labeling appropriate rib sequences over whole pairs (1st to 12th) for all slices. We hope for the applicability thereof in clinical diagnostic environments by this method-efficient automatic rib sequence labeling system.

The purpose of this study was to evaluate the performance of a deep learning system for the automatic diagnosis and classification of rib fractures. METHODS: This retrospective study analyzed computed tomography (CT) data of patients diagnosed with a rib fracture between 1 January 2019 and 23 July 2020 in two hospitals, including 591 patients from Suzhou TCM hospital and 75 patients from Jintan TCM hospital. A deep learning system (Dr.Wise@ChestFracture v1.0) based on a convolutional neural network framework was used as a diagnostic tool, and a human-model comparison experiment was designed to compare the diagnostic efficiencies of the deep learning system and radiologists. Furthermore, a secondary classification model was established to distinguish the different types of fracture. First, a classification model to differentiate between fresh and old fractures was developed. Second, a submodel to determine any misalignment in fresh fractures was established. RESULTS: For all fracture types, the detection efficiency (recall) of the system was statistically significantly better than that of radiologists with different levels of experience (all p < 0.0167 except for senior radiologists). The F1-score of the system for diagnosing rib fractures was similar to that of the radiologists. The system was much faster than the radiologists in assessing rib fractures (all p < 0.0167). The two classification models can distinguish between fresh and old fractures (accuracy = 87.63%) and determine whether there is any misalignment in fresh fractures (accuracy = 95.22%) or not. CONCLUSION: The use of a deep learning system can accurately, automatically, and rapidly diagnose and classify rib fractures, helping doctors improve the diagnostic efficiency and reducing their workload. The classification models can distinguish different types of rib fracture well.

Clinical rib fracture diagnosis via computed tomography (CT) screening has attracted much attention in recent years. However, automated and accurate segmentation solutions remain a challenging task due to the large sets of 3D CT data to deal with. Down-sampling is often required to face computer constraints, but the performance of the segmentation may decrease in this case. METHODS: A new multi-angle projection network (MAPNet) method is proposed for accurately segmenting rib fractures by means of a deep learning approach. The proposed method incorporates multi-angle projection images to complementarily and comprehensively extract the rib characteristics using a rib extraction (RE) module and the fracture features using a fracture segmentation (FS) module. A multi-angle projection fusion (MPF) module is designed for fusing multi-angle spatial features.﻿ RESULTS: It is shown that MAPNet can capture more detailed rib fracture features than some commonly used segmentation networks. Our method achieves a better performance in accuracy (88.06 +/- 6.97%), sensitivity (89.26 +/- 5.69%), specificity (87.58% +/- 7.66%) and in terms of classical criteria like dice (85.41 +/- 3.35%), intersection over union (IoU, 80.37 +/- 4.63%), and Hausdorff distance (HD, 4.34 +/- 3.1). CONCLUSION: We propose a rib fracture segmentation technique to deal with the problem of automatic fracture diagnosis. The proposed method avoids the down-sampling of 3D CT data through a projection technique. Experimental results show that it has excellent potential for clinical applications.

"""

In [24]:
#Делим документ на предложения
sentences = nltk.sent_tokenize(document)
print("Num sentences:", len(sentences))

Num sentences: 31


In [25]:
#Формируем эмбединги по предложениям
embeddings = model.encode(sentences, convert_to_tensor=True)

In [26]:
#Считаем косинусную близость для пар предложений
cos_scores = util.cos_sim(embeddings, embeddings).numpy()

In [27]:
#Центральные метрики предложений
centrality_scores = degree_centrality_scores(cos_scores, threshold=None)

In [28]:
#Сортируем предложения с наивысшими далами в начало списка для последующих срезов
most_central_sentence_indices = np.argsort(-centrality_scores)

In [30]:
#Саммари из 7 предложений
print("\n\nSummary:")
for idx in most_central_sentence_indices[0:7]:
    print(sentences[idx].strip())



Summary:
CONCLUSION: We propose a rib fracture segmentation technique to deal with the problem of automatic fracture diagnosis.
The purpose of this study was to evaluate the performance of a deep learning system for the automatic diagnosis and classification of rib fractures.
The classification models can distinguish different types of rib fracture well.
CONCLUSION: The use of a deep learning system can accurately, automatically, and rapidly diagnose and classify rib fractures, helping doctors improve the diagnostic efficiency and reducing their workload.
The proposed method incorporates multi-angle projection images to complementarily and comprehensively extract the rib characteristics using a rib extraction (RE) module and the fracture features using a fracture segmentation (FS) module.
The system was much faster than the radiologists in assessing rib fractures (all p < 0.0167).
The F1-score of the system for diagnosing rib fractures was similar to that of the radiologists.
