In [None]:
# Synonym replacement
import random  # used to select random words or action
import nltk   # imports the natural lang toolkit
from nltk.corpus import wordnet # wordnet is a lexical database of eng words

# Download necessary NLTK data
nltk.download('wordnet')
nltk.download('punkt') # used for tokenization
nltk.download('omw-1.4')
nltk.download('averaged_perceptron_tagger') # Often needed for wordnet synsets depending on usage
nltk.download('punkt_tab') # Specifically address the LookupError

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [None]:
text = "the movie was absoluetly fantastic and enjoyable"

In [None]:
def get_synonyms(word):
    # Initialize an empty set to store synonyms
    synonyms = set()

    # Iterate through each synset (set of synonyms) for the given word
    for syn in wordnet.synsets(word):
        # Iterate through each lemma (a base form of a word) in the synset
        for lemma in syn.lemmas():
            # Get the name of the lemma, replace underscores with spaces, and add to the set
            synonyms.add(lemma.name().replace('_', ' '))

    # Return a list of unique synonyms, excluding the original word itself
    # Convert to lowercase for case-insensitive comparison
    return [syn for syn in list(synonyms) if syn.lower() != word.lower()]


def synonym_replacement(text, n):
    # Tokenize the input text into words
    words = nltk.word_tokenize(text)
    # Create a copy of the original word list
    new_words = words.copy()

    # Get a list of unique words in the text that are alphabetic
    random_word_list = list(set([word for word in words if word.isalpha()]))
    # Shuffle the list of words to replace randomly
    random.shuffle(random_word_list)

    # Initialize a counter for the number of words replaced
    num_replaced = 0

    # Iterate through the shuffled list of words
    for word in random_word_list:
        # Get the synonyms for the current word
        synonyms = get_synonyms(word)
        # If synonyms are found for the word
        if synonyms:
            # Choose a random synonym from the list of synonyms
            synonym = random.choice(synonyms)
            # Find the index of the original word in the words list
            word_index = words.index(word)
            # Replace the word at that index with the chosen synonym in the new_words list
            new_words[word_index] = synonym
            # Increment the counter for replaced words
            num_replaced += 1

        # If the desired number of replacements has been reached, break the loop
        if num_replaced >= n:
            break

    # Join the words in the new_words list back into a sentence
    sentence = ' '.join(new_words)
    # Return the sentence with synonym replacements
    return sentence

In [None]:
orignal =  "the movie was absoluetly fantastic and enjoyable"
augmented = synonym_replacement(orignal , n=2)
print(orignal)
print(augmented)

the movie was absoluetly fantastic and enjoyable
the movie was absoluetly terrific and gratifying


In [None]:
def diagram_flip(text):
  words = nltk.word_tokenize(text)
  new_words = words.copy()
  indices = list(range(len(words)-1))
  if not indices:
    return text

  file_index = random.choice(indices)
  new_words[file_index], new_words[file_index+1] = new_words[file_index+1], new_words[file_index]
  return ' '.join(new_words)


In [None]:
orignal =  "the movie was absoluetly fantastic and enjoyable"
augmented = diagram_flip(orignal)
print(orignal)
print(augmented)

the movie was absoluetly fantastic and enjoyable
movie the was absoluetly fantastic and enjoyable


In [None]:
# back Translation : translate text to another another lang  and then back to orignal
!pip install deep_translator

Collecting deep_translator
  Downloading deep_translator-1.11.4-py3-none-any.whl.metadata (30 kB)
Downloading deep_translator-1.11.4-py3-none-any.whl (42 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.3/42.3 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: deep_translator
Successfully installed deep_translator-1.11.4


In [None]:
from deep_translator import GoogleTranslator
def back_translate_verbose(text , intermediate_lang = 'fr'):
  try:
    translated = GoogleTranslator(source = 'auto' , target=intermediate_lang).translate(text)
    back_translated = GoogleTranslator(source = intermediate_lang , target='en').translate(translated)
    print(f"Orignal: {text}")
    print(f"Intermediate: {translated}")
    print(f"Back translated: {back_translated}")
  except Exception as e:
    print(e)
    return text

In [None]:
orignal =  "the movie was absoluetly fantastic and enjoyable"
augmented = back_translate_verbose(orignal)
print(orignal)
print(augmented)

Orignal: the movie was absoluetly fantastic and enjoyable
Intermediate: Le film était absolument fantastique et agréable
Back translated: The film was absolutely fantastic and pleasant
the movie was absoluetly fantastic and enjoyable
None


In [None]:
# 4 Adding Noise
# random character swaps
import random

def add_noise(text,noise_level = 0.17):
  text_chars  = list(text)
  num_noisy = int(noise_level * len(text_chars) * noise_level)
  for _ in range(num_noisy):
    idx = random.randint(0,len(text_chars)-2)
    text_chars[idx], text_chars[idx+1] = text_chars[idx+1], text_chars[idx]
  return ''.join(text_chars)



In [None]:
orignal =  "the movie was absoluetly fantastic and enjoyable"
augmented = add_noise(orignal)
print(orignal)
print(augmented)

the movie was absoluetly fantastic and enjoyable
the movie was absoluetly fantasti cand enjoyable
