In [7]:
import random   # Random number generation
import numpy as np  # Numerical operations
import nltk # Natural Language Toolkit for text processing
from nltk.corpus import wordnet    # WordNet for lexical relations mainly used for synonyms
nltk.download('wordnet')    # Download WordNet data
nltk.download('omw-1.4') # Download Open Multilingual WordNet for translations
nltk.download('punkt_tab') # Download Punkt tokenizer models for sentence splitting

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\manis\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\manis\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\manis\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [8]:
text = "The Movie was absolutely fantastic and enjoyable"

In [9]:
def get_synonyms(word):
    """Get synonyms for a given word using WordNet."""
    synonyms = set()  # Use a set to avoid duplicates
    # Iterate through all synsets of the word
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonym = lemma.name().replace('_', ' ')  # Replace underscores with spaces for readability
            if synonym.lower() != word.lower():  # Avoid adding the original word as a synonym
                synonyms.add(lemma.name())
    return list(synonyms)

In [10]:
def synonym_replacement(text, n=2):
    words = nltk.word_tokenize(text) # Tokenize the text into words
    new_words = words.copy()
    random_word_list = [word for word in new_words if word.isalpha()]  # Filter out non-alphabetic words
    random.shuffle(random_word_list)  # Shuffle the list to randomize selection
    num_replaced = 0
    for word in random_word_list:
        synonyms = get_synonyms(word)
        if len(synonyms) >= 1:
            synonym = random.choice(synonyms)   # Randomly select a synonym
            new_words = [synonym if w == word else w for w in new_words]
            num_replaced += 1
        if num_replaced >= n:  # Stop after replacing 'n' words
            break
    return ' '.join(new_words)  # Join the words back into a single string

In [12]:
original = "The Movie was absolutely fantastic and enjoyable"
augmented_text = synonym_replacement(original, n=2)  # Replace 2 words with synonyms
print("Original Text:", original)
print("Augmented Text:", augmented_text)

Original Text: The Movie was absolutely fantastic and enjoyable
Augmented Text: The film was absolutely marvellous and enjoyable


In [22]:
def bigram_filp(text):
    words = nltk.word_tokenize(text)  # Tokenize the text into words
    new_words = words.copy()
    indices = list(range(len(words)-1))
    if not indices:
        return text
    flip_index = random.choice(indices)  # Randomly select an index to flip
    new_words[flip_index], new_words[flip_index+1] = new_words[flip_index+1], new_words[flip_index]  # Swap the selected word with the next one
    return ' '.join(new_words)  # Join the words back into a single string

In [23]:
text2 = "The Movie was absolutely fantastic and enjoyable"
bigram_text = bigram_filp(text2)  # Replace 2 words with synonyms
print("Original Text:", text2)
print("Bigram Text:", bigram_text)

Original Text: The Movie was absolutely fantastic and enjoyable
Bigram Text: The Movie was fantastic absolutely and enjoyable


In [26]:
!pip install deep_translator


Collecting deep_translator
  Obtaining dependency information for deep_translator from https://files.pythonhosted.org/packages/38/3f/61a8ef73236dbea83a1a063a8af2f8e1e41a0df64f122233938391d0f175/deep_translator-1.11.4-py3-none-any.whl.metadata
  Downloading deep_translator-1.11.4-py3-none-any.whl.metadata (30 kB)
Collecting beautifulsoup4<5.0.0,>=4.9.1 (from deep_translator)
  Obtaining dependency information for beautifulsoup4<5.0.0,>=4.9.1 from https://files.pythonhosted.org/packages/50/cd/30110dc0ffcf3b131156077b90e9f60ed75711223f306da4db08eff8403b/beautifulsoup4-4.13.4-py3-none-any.whl.metadata
  Downloading beautifulsoup4-4.13.4-py3-none-any.whl.metadata (3.8 kB)
Collecting requests<3.0.0,>=2.23.0 (from deep_translator)
  Obtaining dependency information for requests<3.0.0,>=2.23.0 from https://files.pythonhosted.org/packages/7c/e4/56027c4a6b4ae70ca9de302488c5ca95ad4a39e190093d6c1a8ace08341b/requests-2.32.4-py3-none-any.whl.metadata
  Downloading requests-2.32.4-py3-none-any.whl.me


[notice] A new release of pip is available: 23.2.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [27]:
from deep_translator import GoogleTranslator

In [31]:
def back_translate_verbose(text, intermediate_lang='fr'):
    try:
        translated = GoogleTranslator(source='auto' , target=intermediate_lang).translate(text)
        back_translate = GoogleTranslator(source='auto', translated='en').translate(translated)

        print(text)
        print(translated)
        print(back_translate)

        return back_translate
    except Exception as e:
        print("Translation error:", e)
        return text

In [34]:
text3 = "The Movie was absolutely fantastic and enjoyable"
back_translate_verbose(text3,intermediate_lang='fr')  # Replace 2 words with synonyms

The Movie was absolutely fantastic and enjoyable
Le film était absolument fantastique et agréable
The film was absolutely fantastic and pleasant


'The film was absolutely fantastic and pleasant'

In [40]:
import random
def add_noise(text, noise_level = 0.1):
    text_chars = list(text)
    num_noisy = int(len(text_chars)*noise_level)
    for _ in range(num_noisy):
        idx = random.randint(0,len(text_chars)-2)
        text_chars[idx], text_chars[idx+1] = text_chars[idx+1], text_chars[idx]
    return ''.join(text_chars)

In [43]:
print(add_noise(text3))

The Movie was absolutely afntastic ande nojybale
