# Synonym Replacement


In [2]:
import nltk
from nltk.corpus import wordnet
import random

In [3]:
# Download NLTK resources if needed
nltk.download("wordnet")
nltk.download("omw-1.4")

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\james\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\james\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [4]:
def synonym_replacement(sentence, n=1):
    words = sentence.split()
    new_words = words.copy()

    # Choose random words in the sentence to replace with synonyms
    random_word_list = list(set(words))
    random.shuffle(random_word_list)
    num_replaced = 0

    for random_word in random_word_list:
        synonyms = wordnet.synsets(random_word)
        if synonyms:
            synonym = (
                random.choice(synonyms).lemmas()[0].name()
            )  # Choose a random synonym
            if synonym != random_word:
                new_words = [
                    synonym if word == random_word else word for word in new_words
                ]
                num_replaced += 1
            if num_replaced >= n:  # Replace n words with synonyms
                break

    return " ".join(new_words)

In [5]:
import pandas as pd

In [6]:
data = pd.read_csv("./input/deceptive-opinion-spam-corpus/deceptive-opinion.csv")

In [7]:
sample_text = data["text"][0]
synonym_replaced_text = synonym_replacement(sample_text, n=2)

In [8]:
print(sample_text)
print(synonym_replaced_text)

We stayed for a one night getaway with family on a thursday. Triple AAA rate of 173 was a steal. 7th floor room complete with 44in plasma TV bose stereo, voss and evian water, and gorgeous bathroom(no tub but was fine for us) Concierge was very helpful. You cannot beat this location... Only flaw was breakfast was pricey and service was very very slow(2hours for four kids and four adults on a friday morning) even though there were only two other tables in the restaurant. Food was very good so it was worth the wait. I would return in a heartbeat. A gem in chicago... 

We stayed for a one night getaway with family on a thursday. Triple AAA rate of 173 was a steal. 7th floor room complete with 44in plasma TV bose stereo, voss and evian water, and gorgeous bathroom(no tub but was fine for us) Concierge was very helpful. You cannot pulsate this location... Only flaw was breakfast was pricey and service was very very slow(2hours for four child and four adults on a friday morning) even though 

# Back Translation


In [9]:
from deep_translator import GoogleTranslator


def back_translate(text, intermediate_lang="fr"):
    # Step 1: English to French (or other intermediate language)
    translated = GoogleTranslator(source="en", target=intermediate_lang).translate(text)
    # Step 2: French back to English
    back_translated = GoogleTranslator(source=intermediate_lang, target="en").translate(
        translated
    )
    return back_translated


# Example usage

result = back_translate(sample_text)
print("Original:       ", sample_text)
print("Back Translated:", result)

Original:        We stayed for a one night getaway with family on a thursday. Triple AAA rate of 173 was a steal. 7th floor room complete with 44in plasma TV bose stereo, voss and evian water, and gorgeous bathroom(no tub but was fine for us) Concierge was very helpful. You cannot beat this location... Only flaw was breakfast was pricey and service was very very slow(2hours for four kids and four adults on a friday morning) even though there were only two other tables in the restaurant. Food was very good so it was worth the wait. I would return in a heartbeat. A gem in chicago... 

Back Translated: We stayed here for a one night family getaway on a Thursday. The AAA triple rate of $173 was a steal. Room on the 7th floor with a 44 inch plasma TV, Bose stereo, Voss and Evian water, and a beautiful bathroom (no tub but that was fine for us). The concierge was very helpful. You can't beat this place... The only downside was that the breakfast was expensive and the service was very very sl

In [10]:
enhanced_data = pd.read_csv("./input/deceptive-opinion-spam-corpus/deceptive-opinion-augmented.csv")

In [11]:
enhanced_data.head()

Unnamed: 0,deceptive,hotel,polarity,source,text
0,truthful,conrad,positive,TripAdvisor,We stay for a one night getaway with family on...
1,truthful,conrad,positive,TripAdvisor,We stayed here for a one night family getaway ...
2,truthful,hyatt,positive,TripAdvisor,triple A rate with upgrade to view room was le...
3,truthful,hyatt,positive,TripAdvisor,The Triple A rate with a view room upgrade was...
4,truthful,hyatt,positive,TripAdvisor,This semen a little late as I'm finally catchi...


In [12]:
enhanced_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3200 entries, 0 to 3199
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   deceptive  3200 non-null   object
 1   hotel      3200 non-null   object
 2   polarity   3200 non-null   object
 3   source     3200 non-null   object
 4   text       3200 non-null   object
dtypes: object(5)
memory usage: 125.1+ KB
