In [1]:
import nltk
from nltk.corpus import wordnet
import random

# Sample medical transcripts
medical_transcripts = [
    "Patient presented with symptoms of cough and shortness of breath.",
    "Physical examination revealed elevated temperature and wheezing.",
    "Diagnosis confirmed as bronchitis, prescribed antibiotics and inhaler."
]

# Function to get synonyms of a word using NLTK WordNet
def get_synonyms(word):
    synonyms = set()
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonyms.add(lemma.name())
    return list(synonyms)

# Function to perform synonym replacement in a text
def synonym_replacement(text, num_replacements=1):
    words = nltk.word_tokenize(text)
    augmented_text = []
    for word in words:
        if random.random() < 0.5:  # Probability of replacing each word
            synonyms = get_synonyms(word)
            if synonyms:
                replacement = random.choice(synonyms)
                augmented_text.append(replacement)
            else:
                augmented_text.append(word)
        else:
            augmented_text.append(word)
    return ' '.join(augmented_text)

# Augmenting each medical transcript
augmented_transcripts = [synonym_replacement(transcript) for transcript in medical_transcripts]

# Print original and augmented transcripts
print("Original Transcripts:")
for transcript in medical_transcripts:
    print("- ", transcript)

print("\nAugmented Transcripts:")
for augmented_transcript in augmented_transcripts:
    print("- ", augmented_transcript)


Original Transcripts:
-  Patient presented with symptoms of cough and shortness of breath.
-  Physical examination revealed elevated temperature and wheezing.
-  Diagnosis confirmed as bronchitis, prescribed antibiotics and inhaler.

Augmented Transcripts:
-  Patient presented with symptom of cough and curtness of breath .
-  strong-arm examination unveil elevated temperature and asthmatic .
-  Diagnosis confirmed As bronchitis , decreed antibiotics and inhaler .


In [2]:
import random

# Sample medical transcripts
medical_transcripts = [
    "Patient presented with symptoms of cough and shortness of breath.",
    "Physical examination revealed elevated temperature and wheezing.",
    "Diagnosis confirmed as bronchitis, prescribed antibiotics and inhaler."
]

# Function to perform random insertion in a text
def random_insertion(text, num_insertions=1):
    words = text.split()
    for _ in range(num_insertions):
        insertion_point = random.randint(0, len(words))
        insertion_word = "additional_word"
        words.insert(insertion_point, insertion_word)
    return ' '.join(words)

# Augmenting each medical transcript
augmented_transcripts = [random_insertion(transcript) for transcript in medical_transcripts]

# Print original and augmented transcripts
print("Original Transcripts:")
for transcript in medical_transcripts:
    print("- ", transcript)

print("\nAugmented Transcripts:")
for augmented_transcript in augmented_transcripts:
    print("- ", augmented_transcript)


Original Transcripts:
-  Patient presented with symptoms of cough and shortness of breath.
-  Physical examination revealed elevated temperature and wheezing.
-  Diagnosis confirmed as bronchitis, prescribed antibiotics and inhaler.

Augmented Transcripts:
-  Patient presented additional_word with symptoms of cough and shortness of breath.
-  Physical examination revealed elevated temperature and wheezing. additional_word
-  Diagnosis confirmed as additional_word bronchitis, prescribed antibiotics and inhaler.


In [3]:
# import nltk
# nltk.download('wordnet')

In [4]:
import random

# Sample medical transcripts
medical_transcripts = [
    "Patient presented with symptoms of cough and shortness of breath.",
    "Physical examination revealed elevated temperature and wheezing.",
    "Diagnosis confirmed as bronchitis, prescribed antibiotics and inhaler."
]

# Function to perform random deletion in a text
def random_deletion(text, probability=0.5):
    words = text.split()
    remaining_words = [word for word in words if random.random() > probability]
    return ' '.join(remaining_words)

# Augmenting each medical transcript
augmented_transcripts = [random_deletion(transcript) for transcript in medical_transcripts]

# Print original and augmented transcripts
print("Original Transcripts:")
for transcript in medical_transcripts:
    print("- ", transcript)

print("\nAugmented Transcripts:")
for augmented_transcript in augmented_transcripts:
    print("- ", augmented_transcript)


Original Transcripts:
-  Patient presented with symptoms of cough and shortness of breath.
-  Physical examination revealed elevated temperature and wheezing.
-  Diagnosis confirmed as bronchitis, prescribed antibiotics and inhaler.

Augmented Transcripts:
-  Patient with of cough and of breath.
-  examination revealed elevated
-  Diagnosis confirmed bronchitis, prescribed antibiotics


In [5]:
import random

# Sample medical transcripts
medical_transcripts = [
    "Patient presented with symptoms of cough and shortness of breath.",
    "Physical examination revealed elevated temperature and wheezing.",
    "Diagnosis confirmed as bronchitis, prescribed antibiotics and inhaler."
]

# Function to perform random swap in a text
def random_swap(text, num_swaps=1):
    words = text.split()
    for _ in range(num_swaps):
        idx1, idx2 = random.sample(range(len(words)), 2)
        words[idx1], words[idx2] = words[idx2], words[idx1]
    return ' '.join(words)

# Augmenting each medical transcript
augmented_transcripts = [random_swap(transcript) for transcript in medical_transcripts]

# Print original and augmented transcripts
print("Original Transcripts:")
for transcript in medical_transcripts:
    print("- ", transcript)

print("\nAugmented Transcripts:")
for augmented_transcript in augmented_transcripts:
    print("- ", augmented_transcript)


Original Transcripts:
-  Patient presented with symptoms of cough and shortness of breath.
-  Physical examination revealed elevated temperature and wheezing.
-  Diagnosis confirmed as bronchitis, prescribed antibiotics and inhaler.

Augmented Transcripts:
-  Patient presented and symptoms of cough with shortness of breath.
-  Physical examination wheezing. elevated temperature and revealed
-  confirmed Diagnosis as bronchitis, prescribed antibiotics and inhaler.


In [6]:
from googletrans import Translator
import random
import time

# Sample medical transcripts
medical_transcripts = [
    "Patient presented with symptoms of cough and shortness of breath.",
    "Physical examination revealed elevated temperature and wheezing.",
    "Diagnosis confirmed as bronchitis, prescribed antibiotics and inhaler."
]

# Initialize translator
translator = Translator()

# Function to perform back-translation
def back_translate(text, src_lang='en', target_lang='fr'):
    try:
        # Translate to the target language
        translated_text = translator.translate(text, src=src_lang, dest=target_lang).text
        # Introduce artificial delay to avoid rate limiting
        time.sleep(random.uniform(0.5, 1.5))
        # Translate back to the source language
        back_translated_text = translator.translate(translated_text, src=target_lang, dest=src_lang).text
        return back_translated_text
    except Exception as e:
        print(f"Error occurred: {e}")
        return None

# Perform back-translation on each transcript with retry and exponential backoff
augmented_transcripts = []
for transcript in medical_transcripts:
    retries = 3
    for _ in range(retries):
        translated_text = back_translate(transcript)
        if translated_text is not None:
            augmented_transcripts.append(translated_text)
            break
        else:
            # Retry with exponential backoff
            delay = 2 ** _  # Exponential backoff
            time.sleep(delay)
    else:
        # If all retries fail, use original text
        augmented_transcripts.append(transcript)

# Print original and augmented transcripts
print("Original Transcripts:")
for i, transcript in enumerate(medical_transcripts):
    print(f"{i+1}. {transcript}")

print("\nAugmented Transcripts:")
for i, transcript in enumerate(augmented_transcripts):
    print(f"{i+1}. {transcript}")


Original Transcripts:
1. Patient presented with symptoms of cough and shortness of breath.
2. Physical examination revealed elevated temperature and wheezing.
3. Diagnosis confirmed as bronchitis, prescribed antibiotics and inhaler.

Augmented Transcripts:
1. Patient had coughing and breathless symptoms.
2. The physical examination revealed a high temperature and a whistling breathing.
3. The diagnosis confirmed in the form of bronchitis, prescribed antibiotics and inhaler.


In [7]:
!pip install googletrans==4.0.0-rc1



In [8]:
import random
import nltk
from nltk.corpus import wordnet

# Sample medical transcripts
medical_transcripts = [
    "Patient presented with symptoms of cough and shortness of breath.",
    "Physical examination revealed elevated temperature and wheezing.",
    "Diagnosis confirmed as bronchitis, prescribed antibiotics and inhaler."
]

# Function to get synonyms of a word using NLTK WordNet
def get_synonyms(word):
    synonyms = set()
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonyms.add(lemma.name())
    return list(synonyms)

# Function to perform paraphrasing in a text
def paraphrase(text):
    words = nltk.word_tokenize(text)
    paraphrased_text = []
    for word in words:
        synonyms = get_synonyms(word)
        if synonyms:
            paraphrased_word = random.choice(synonyms)
            paraphrased_text.append(paraphrased_word)
        else:
            paraphrased_text.append(word)
    return ' '.join(paraphrased_text)

# Augmenting each medical transcript
augmented_transcripts = [paraphrase(transcript) for transcript in medical_transcripts]

# Print original and augmented transcripts
print("Original Transcripts:")
for transcript in medical_transcripts:
    print("- ", transcript)

print("\nAugmented Transcripts:")
for augmented_transcript in augmented_transcripts:
    print("- ", augmented_transcript)


Original Transcripts:
-  Patient presented with symptoms of cough and shortness of breath.
-  Physical examination revealed elevated temperature and wheezing.
-  Diagnosis confirmed as bronchitis, prescribed antibiotics and inhaler.

Augmented Transcripts:
-  patient_role demo with symptom of coughing and gruffness of intimation .
-  forcible interrogatory reveal high-flown temperature and wheezy .
-  diagnosis support group_A bronchitis , prescribed antibiotic and inhaler .


In [9]:
import random
import nltk
from nltk.corpus import wordnet

# Sample medical transcripts
medical_transcripts = [
    "Patient presented with symptoms of cough and shortness of breath.",
    "Physical examination revealed elevated temperature and wheezing.",
    "Diagnosis confirmed as bronchitis, prescribed antibiotics and inhaler."
]

# Function to get synonyms of a word using NLTK WordNet
def get_synonyms(word):
    synonyms = set()
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonyms.add(lemma.name())
    return list(synonyms)

# Function to perform paraphrasing in a text
def paraphrase(text):
    words = nltk.word_tokenize(text)
    paraphrased_text = []
    for word in words:
        synonyms = get_synonyms(word)
        if synonyms:
            paraphrased_word = random.choice(synonyms)
            paraphrased_text.append(paraphrased_word)
        else:
            paraphrased_text.append(word)
    return ' '.join(paraphrased_text)

# Augmenting each medical transcript
augmented_transcripts = [paraphrase(transcript) for transcript in medical_transcripts]

# Print original and augmented transcripts
print("Original Transcripts:")
for transcript in medical_transcripts:
    print("- ", transcript)

print("\nAugmented Transcripts:")
for augmented_transcript in augmented_transcripts:
    print("- ", augmented_transcript)



Original Transcripts:
-  Patient presented with symptoms of cough and shortness of breath.
-  Physical examination revealed elevated temperature and wheezing.
-  Diagnosis confirmed as bronchitis, prescribed antibiotics and inhaler.

Augmented Transcripts:
-  patient introduce with symptom of cough and gruffness of intimation .
-  physical exam let_on advance temperature and asthmatic .
-  diagnosing affirm As bronchitis , positive antibiotic_drug and inhalator .


In [10]:
import nltk
import random

# Example medical transcripts data
medical_transcripts = [
    "Patient presented with symptoms of cough and shortness of breath.",
    "Physical examination revealed elevated temperature and wheezing.",
    "Diagnosis confirmed as bronchitis, prescribed antibiotics and inhaler."
]

# Function to generate new sentences based on existing ones
def generate_text(sentences, num_sentences=5):
    generated_sentences = []

    for _ in range(num_sentences):
        # Randomly select a sentence from the existing transcripts
        random_sentence = random.choice(sentences)

        # Tokenize the sentence into words
        words = nltk.word_tokenize(random_sentence)

        # Shuffle the words to create variations
        random.shuffle(words)

        # Join the shuffled words to form a new sentence
        new_sentence = ' '.join(words)
        generated_sentences.append(new_sentence)

    return generated_sentences

# Generate new medical transcripts
generated_transcripts = generate_text(medical_transcripts)

# Print the generated transcripts
print("Generated Transcripts:")
for i, transcript in enumerate(generated_transcripts, 1):
    print(f"{i}. {transcript}")


Generated Transcripts:
1. revealed elevated and Physical wheezing temperature . examination
2. with of shortness presented and breath . symptoms cough of Patient
3. confirmed antibiotics and Diagnosis as inhaler bronchitis prescribed , .
4. and . Physical examination temperature revealed elevated wheezing
5. Diagnosis , prescribed inhaler confirmed as bronchitis antibiotics and .


In [11]:
import numpy as np
import gensim.downloader as api

# Load pre-trained word embeddings (word2vec)
word_vectors = api.load("word2vec-google-news-300")

# Sample medical transcripts
medical_transcripts = [
    "Patient presented with symptoms of cough and shortness of breath.",
    "Physical examination revealed elevated temperature and wheezing.",
    "Diagnosis confirmed as bronchitis, prescribed antibiotics and inhaler."
]

# Function to perform word embedding interpolation
def word_embedding_interpolation(text, alpha=0.5):
    words = text.split()
    interpolated_text = []

    # Iterate through each word in the text
    for word in words:
        # Check if word exists in the word embeddings vocabulary
        if word in word_vectors:
            # Get word embeddings
            word_vector = word_vectors[word]

            # Generate a random vector for interpolation
            random_vector = np.random.rand(len(word_vector))

            # Interpolate between word vector and random vector
            interpolated_vector = (1 - alpha) * word_vector + alpha * random_vector

            # Find the closest word in the embeddings space to the interpolated vector
            closest_word = word_vectors.similar_by_vector(interpolated_vector)[0][0]
            interpolated_text.append(closest_word)
        else:
            interpolated_text.append(word)

    return ' '.join(interpolated_text)

# Augmenting each medical transcript with word embedding interpolation
augmented_transcripts = [word_embedding_interpolation(transcript) for transcript in medical_transcripts]

# Print original and augmented transcripts
print("Original Transcripts:")
for transcript in medical_transcripts:
    print("- ", transcript)

print("\nAugmented Transcripts with Word Embedding Interpolation:")
for augmented_transcript in augmented_transcripts:
    print("- ", augmented_transcript)


Original Transcripts:
-  Patient presented with symptoms of cough and shortness of breath.
-  Physical examination revealed elevated temperature and wheezing.
-  Diagnosis confirmed as bronchitis, prescribed antibiotics and inhaler.

Augmented Transcripts with Word Embedding Interpolation:
-  Patient succinctly_summarized Ambiel symptoms of cough and shortness of breath.
-  Physical examination revealed lawsuit_Patricia_Conradt temperature and wheezing.
-  Diagnosis confirmed ST_HYACINTHE_Que bronchitis, orthopedist_Dr._Xavier_Duralde antibiotics and inhaler.


In [12]:
import random

# Sample medical transcripts
medical_transcripts = [
    "Patient presented with symptoms of cough and shortness of breath.",
    "Physical examination revealed elevated temperature and wheezing.",
    "Diagnosis confirmed as bronchitis, prescribed antibiotics and inhaler."
]

# Function to perform text rotation interpolation
def text_rotation_interpolation(text, rotation_factor=0.5):
    words = text.split()
    num_rotations = round(len(words) * rotation_factor)
    rotated_text = words[num_rotations:] + words[:num_rotations]
    return ' '.join(rotated_text)

# Augmenting each medical transcript with text rotation interpolation
augmented_transcripts = [text_rotation_interpolation(transcript) for transcript in medical_transcripts]

# Print original and augmented transcripts
print("Original Transcripts:")
for transcript in medical_transcripts:
    print("- ", transcript)

print("\nAugmented Transcripts with Text Rotation Interpolation:")
for augmented_transcript in augmented_transcripts:
    print("- ", augmented_transcript)


Original Transcripts:
-  Patient presented with symptoms of cough and shortness of breath.
-  Physical examination revealed elevated temperature and wheezing.
-  Diagnosis confirmed as bronchitis, prescribed antibiotics and inhaler.

Augmented Transcripts with Text Rotation Interpolation:
-  cough and shortness of breath. Patient presented with symptoms of
-  temperature and wheezing. Physical examination revealed elevated
-  prescribed antibiotics and inhaler. Diagnosis confirmed as bronchitis,


In [13]:
import random

# Sample medical transcripts
medical_transcripts = [
    "Patient presented with symptoms of cough and shortness of breath.",
    "Physical examination revealed elevated temperature and wheezing.",
    "Diagnosis confirmed as bronchitis, prescribed antibiotics and inhaler."
]

# Function to perform text masking interpolation
def text_masking_interpolation(text, masking_factor=0.5):
    words = text.split()
    num_words_to_mask = round(len(words) * masking_factor)
    masked_indices = random.sample(range(len(words)), num_words_to_mask)
    masked_text = [word if idx not in masked_indices else 'MASK' for idx, word in enumerate(words)]
    return ' '.join(masked_text)

# Augmenting each medical transcript with text masking interpolation
augmented_transcripts = [text_masking_interpolation(transcript) for transcript in medical_transcripts]

# Print original and augmented transcripts
print("Original Transcripts:")
for transcript in medical_transcripts:
    print("- ", transcript)

print("\nAugmented Transcripts with Text Masking Interpolation:")
for augmented_transcript in augmented_transcripts:
    print("- ", augmented_transcript)


Original Transcripts:
-  Patient presented with symptoms of cough and shortness of breath.
-  Physical examination revealed elevated temperature and wheezing.
-  Diagnosis confirmed as bronchitis, prescribed antibiotics and inhaler.

Augmented Transcripts with Text Masking Interpolation:
-  MASK presented MASK symptoms of cough MASK shortness MASK MASK
-  MASK examination revealed elevated MASK MASK MASK
-  MASK MASK MASK MASK prescribed antibiotics and inhaler.
