In [1]:
### generating synthesised reports from the 3 epochs 50000 rows generator ###

In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from transformers import pipeline
import pandas as pd
import random

In [5]:
tokenizer = AutoTokenizer.from_pretrained("microsoft/biogpt")

In [49]:
model = AutoModelForCausalLM.from_pretrained("../models/checkpoint-38000/")

In [50]:
generator = pipeline('text-generation', model=model, tokenizer=tokenizer, 
                     min_length = 30,
                     max_length = 120, 
                     #temperature = 0.9, 
                     #num_beams =2,
                     #no_repeat_ngram_size=2,
                     do_sample=True,
                     top_k=0
                    )

In [51]:
generator( " Dr. Loughridge Dr. Stewart  FG5  INDICATIONS FOR PROCEDURE: Abdominal Pain Extent of Exam:  Pylorus  FINDINGS")





[{'generated_text': ' Dr. Loughridge Dr. Stewart  FG5  INDICATIONS FOR PROCEDURE: Abdominal Pain Extent of Exam:  Pylorus  FINDINGS: Columnar lined oesophagus is present. It is a long segment. parture.The ulcer was biopsied x'}]

In [None]:
## import df_xy and pick out some random inputs ##

In [52]:

df_xy = pd.read_csv('../data/df_xy.csv', index_col=False)

In [53]:
def gen_inputs(df, num_sents):
    input_strings = []
    for i in range(num_sents):
        ind = random.randint(0,50000)
        string = df.text[ind]
        pre_findings = string.split("FINDINGS:")[0]
        input_strings.append(pre_findings + "FINDINGS:")
    return input_strings

In [54]:
hundred_string = gen_inputs(df_xy, 100)
hundred_string

[' Dr. George Dr. Lee  FG7  INDICATIONS FOR PROCEDURE: Dysphagia Vomiting known hiatus hernia Extent of Exam:  D2  FINDINGS:',
 ' Dr. Lopez Dr. Hall  FG6  INDICATIONS FOR PROCEDURE: Abdominal Pain Dysphagia/Odynophagia Extent of Exam:  D2  FINDINGS:',
 ' Dr. Maxfield Dr. el-Hasen  FG5  INDICATIONS FOR PROCEDURE: Other - Post caustic soda ingestion - assessment of healing Extent of Exam:  D1  FINDINGS:',
 ' Dr. Lazo Dr. Stewart  FG4  INDICATIONS FOR PROCEDURE: Weight Loss Abdominal Pain Extent of Exam:  GOJ  FINDINGS:',
 ' Dr. Jernigan Dr. Delgado  FG6  INDICATIONS FOR PROCEDURE: Anaemia/Low Iron or Vitamins Abdominal Pain Extent of Exam:  Stomach body  FINDINGS:',
 ' Dr. el-Suleiman Dr. Delgado  FG6  INDICATIONS FOR PROCEDURE: Therapeutic- Food bolus/foreign body removal Extent of Exam:  D1  FINDINGS:',
 ' Dr. el-Rahmani Dr. el-Hasen  FG4  INDICATIONS FOR PROCEDURE: Other- clips positioning to oesophageal lesion Extent of Exam:  D2  FINDINGS:',
 ' Dr. Miller Dr. Nguyen  FG3  INDICATION

In [55]:
def findings_generator(input_list, generator):
    gen_findings = []
    for in_string in input_list:
        gen_findings.append(generator(in_string)[0]["generated_text"].split("FINDINGS:")[1])
    return gen_findings
        


In [56]:
hundred_findings = findings_generator(hundred_string, generator)

In [57]:
hundred_findings

[' Normal gastroscopy to the duodenum.',
 ' There is inflammation in the third part of the duodenum which is mild. The duodenal mucosa appears to have an uneven, cobblestone-like appearance due to inflammation.The ulcer was biopsied x8 for further analysis.. NA biable',
 ' The patient has a stricture in the D1 / D2 angle which is contractedIt is easily traversible..',
 ' Hiatus hernia.. at ambient',
 ' The patient has an ulcer in the body which is exudativeIt is not bleeding. Surrounded by erythema and exhibiting a nodular surface, the severe ulceration presents with a distinct loss of mucosal tissue.Following the biopsy of the lesion, there was bleeding, which was promptly managed.. The surrounding mucosa appears thickened and nodular. The gastric stricture is located in the body of the stomach, with a circumferential narrowing of the',
 ' The patient has a stricture in the duodenal bulb which is stiffNormal, no appreciable narrowing of the duodenal lumen is noted on endoscopy to the 

In [58]:
hund_gen_df = pd.DataFrame(hundred_findings)

In [59]:
hund_gen_df=hund_gen_df.rename(columns={0:"findings"})

In [60]:
hund_gen_df.findings[1]

' There is inflammation in the third part of the duodenum which is mild. The duodenal mucosa appears to have an uneven, cobblestone-like appearance due to inflammation.The ulcer was biopsied x8 for further analysis.. NA biable'

In [61]:
hund_gen_df.to_csv("../data/findings100.csv")

In [None]:
## analysing the generated findings, to see how different they are from the original corpus ##

In [45]:
def calculate_appearance(string):
    data_df = df_xy
    data_df["Indexes"]= df_xy["text"].str.find(string)
    return len(data_df[data_df["Indexes"] > 0])
    

In [46]:
def clause_appearance(string):
    total_appearances = []
    split_string = string.replace(",",".").split(".")
    for string in split_string:
        total_appearances.append(calculate_appearance(string))
        
    return total_appearances
        

In [47]:
test = calculate_appearance("Normal gastroscopy")

In [48]:
hund_gen_df["in_corpus?"] = hund_gen_df["findings"].apply(calculate_appearance)
hund_gen_df["clause_in_corpus?"] = hund_gen_df["findings"].apply(clause_appearance)

In [33]:
hund_gen_df.head(50)

Unnamed: 0,findings,in_corpus?,clause_in_corpus?
0,A large hiatus hernia is present. atesscaffol...,0,"[132, 0, 0]"
1,Normal gastroscopy to the duodenum. folllow,0,"[12748, 337]"
2,There is a nodule in the third part of the du...,0,"[78, 310, 0]"
3,The patient has a stricture in the stomach wh...,0,"[1, 0, 59, 1297, 0, 0]"
4,There is a nodule in the D1 / D2 angle which ...,0,"[0, 0]"
5,The patient has a 4mm nodule in the D1 / D2 a...,0,"[0, 0]"
6,Normal gastroscopy to the duodenum.,12748,"[12748, 0]"
7,The patient has a 10mm nodule in the second p...,0,"[12, 0]"
8,There is an ulcer in the second part of the d...,0,"[0, 0, 300, 300, 1297, 0, 0, 0, 67, 314]"
9,LA Grade C oesophagitis. There is significant...,0,"[0, 235, 202, 202, 106, 0, 0, 0]"
