In [1]:
import numpy as np
import pandas as pd

### Experiments with Sentence Transformers

In [2]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity


In [3]:
#model = SentenceTransformer('paraphrase-distilroberta-base-v1') #we can use robert or miniLM
model=SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [4]:
report_file_path = "../reportscoring/"


### QI score Calculations to measure report quality improvement
### Using the SBERT method for encoding the reports captures semantics better

In [5]:
import os
# reads the file, extracts sentences, encodes reports, converts them to a vector
def report_encoder(filepath, model):

    #file = open(filepath)
    file = open(filepath, 'r')
    Lines = file.readlines()
    #extract sentences
    allsentences=[]
    #sort the sentences lexically
    for perline in Lines:
        if (perline.strip()!=""):
            linetokens=perline.split(".")
            for line in linetokens:
                if (line.strip()!=""):
                    line1 = line.strip()+"."
                    line1=line1.lower()
                    allsentences.append(line1)
       
    file.close()
    #not necessary since we are averaging across the sentences
    sortedsentences=sorted(allsentences)
   # print(len(allsentences))
    #print(allsentences)
    #encode the report
    sentence_embeddings = model.encode(sortedsentences)
    #print(sentence_embeddings)
    
    sentence_avg = np.average(sentence_embeddings, axis = 0)
    #print(sentence_avg.shape)
    #convert them into a single vector
    sentence_avg = sentence_avg.reshape(1, sentence_avg.shape[0])
    #print(sentence_avg)
    
    return sentence_avg
    
def compute_QI_score_BERT(reportdir,model):
    origdir=reportdir+"origreports"
    aidir=reportdir+"aireports"
    modifdir=reportdir+"correctedreports"
    filenames=[]
    ntotal=0
    npositive=0
    nnegative=0
    nequal=0
    for root, dirs, files in os.walk(modifdir, topdown=False, onerror=None, followlinks=True):
        for filename in files:
            if filename != '.DS_Store':
                filepath = os.path.join(root, filename)
                filenames.append(filename)
    for filename in filenames:
        origpath=os.path.join(origdir,filename)
        aipath=os.path.join(aidir,filename)
        modifpath=os.path.join(modifdir,filename)
        
        vect_orig=report_encoder(origpath,model)
       # print("Ai report")
        vect_ai=report_encoder(aipath,model)
        #print("Modif report")
        vect_modif=report_encoder(modifpath, model)
        
        D12=cosine_similarity([vect_orig[0],vect_ai[0]])
        D13=cosine_similarity([vect_orig[0],vect_modif[0]])
        orig_ai_score=D12[0,1]
        orig_modif_score=D13[0,1]
        ntotal+=1
       # print(orig_modif_score,orig_ai_score)
        if (orig_modif_score>orig_ai_score):
            npositive+=1
        elif (orig_modif_score<orig_ai_score):
            nnegative+=1
        else:
            nequal+=1
    QI_score=((npositive+nequal-nnegative)/ntotal)*100
    print(npositive,ntotal,(npositive/ntotal),(nnegative/ntotal), (nequal/ntotal))
    print("Net improvement = ",QI_score,"%")
    return QI_score,npositive,nnegative,nequal,ntotal
         

In [67]:
qi_score,npositive,nnegative,nequal,ntotal=compute_QI_score_BERT(report_file_path,model)

1105 3661 0.3018301010652827 0.4228352909041246 0.27533460803059273
Net improvement =  15.432941819175088


In [16]:
Sent1="There is no pleural effusion."
Sent2="There is no evidence of pleural effusion."
e1= model.encode(Sent1)
e2=model.encode(Sent2)
D12=cosine_similarity([e1,e2])
print(D12[0,1])
print(bleu(Sent1,Sent2))


0.93675435
0.0105115714765347



## Quality improvement using BLEU score for capturing the similarity between report sentences
### truncates the report to have the same sentences as BLEU works with equal number of sentences in the report
##### reference: https://towardsdatascience.com/how-to-evaluate-text-generation-models-metrics-for-automatic-evaluation-of-nlp-models-e1c251b04ec1
#### WIth Bleu score we get much higher quality improvement


In [9]:
from nltk.translate.bleu_score import SmoothingFunction, corpus_bleu, sentence_bleu

def bleu_encode(filepath):

    #file = open(filepath)
    file = open(filepath, 'r')
    Lines = file.readlines()
    #extract sentences
    allsentences=[]
    #sort the sentences lexically
    for perline in Lines:
        if (perline.strip()!=""):
            linetokens=perline.split(".")
            for line in linetokens:
                if (line.strip()!=""):
                    line1 = line.strip()+"."
                    line1=line1.lower()
                    allsentences.append(line1)
       
    file.close()
    #not necessary since we are averaging across the sentences
    sortedsentences=sorted(allsentences)
   
    return sortedsentences
    
def bleu(rep1, rep2):
    ''' 
    calculate pair wise bleu score. uses nltk implementation
    Args:
        references : a list of reference sentences 
        candidates : a list of candidate(generated) sentences
    Returns:
        bleu score(float)
    '''
    rep1_bleu = []
    rep2_bleu = []
    for l in rep2:
        rep2_bleu.append(l.split())
        
    for i,l in enumerate(rep1):
        rep1_bleu.append([l.split()])
    cc = SmoothingFunction()
    minlength=min(len(rep1_bleu), len(rep2_bleu))
   # print(minlength,len(rep1_bleu), len(rep2_bleu))
    score_bleu = corpus_bleu(rep1_bleu[0:minlength], rep2_bleu[0:minlength], weights=(0, 1, 0, 0), smoothing_function=cc.method4)
    return score_bleu

def compute_QI_score_BLEU(reportdir):
    origdir=reportdir+"origreports"
    aidir=reportdir+"aireports"
    modifdir=reportdir+"correctedreports"
    filenames=[]
    ntotal=0
    npositive=0
    nnegative=0
    nequal=0
    for root, dirs, files in os.walk(modifdir, topdown=False, onerror=None, followlinks=True):
        for filename in files:
            if filename != '.DS_Store':
                filepath = os.path.join(root, filename)
                filenames.append(filename)
    for filename in filenames:
        origpath=os.path.join(origdir,filename)
        aipath=os.path.join(aidir,filename)
        modifpath=os.path.join(modifdir,filename)
        origsent= bleu_encode(origpath)
        aisent=bleu_encode(aipath)
        modifsent=bleu_encode(modifpath)
       
        orig_ai_score=bleu(origsent,aisent)
        orig_modif_score=bleu(origsent,modifsent)
        ntotal+=1
       # print(orig_modif_score,orig_ai_score)
        if (orig_modif_score>orig_ai_score):
            npositive+=1
        elif (orig_modif_score<orig_ai_score):
            nnegative+=1
        else:
            nequal+=1
    QI_score=((npositive+nequal-nnegative)/ntotal)*100
    print(npositive,ntotal,(npositive/ntotal),(nnegative/ntotal), (nequal/ntotal))
    print("Net improvement = ",QI_score,"%")
    return QI_score,npositive,nnegative,nequal,ntotal

In [105]:
QI_score,npositive,nnegative,nequal,ntotal=compute_QI_score_BLEU(report_file_path)

1312 3661 0.3583720295001366 0.2838022398251844 0.35782573067467904
Net improvement =  43.239552034963125 %
