In [5]:
import nltk

In [6]:
# Download required NLTK data
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [7]:
# Import NLTK modules
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer

In [8]:
# Sample text
text = "This is an example sentence for demonstrating NLTK preprocessing techniques."

In [10]:
# Tokenization
nltk.download('punkt_tab')
print("Tokenization:")
print(word_tokenize(text))
print(sent_tokenize(text))

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


Tokenization:
['This', 'is', 'an', 'example', 'sentence', 'for', 'demonstrating', 'NLTK', 'preprocessing', 'techniques', '.']
['This is an example sentence for demonstrating NLTK preprocessing techniques.']


In [11]:
# Stopwords removal
print("\nStopwords removal:")
stop_words = set(stopwords.words('english'))
tokens = word_tokenize(text)
filtered_tokens = [token for token in tokens if token.lower() not in stop_words]
print(filtered_tokens)


Stopwords removal:
['example', 'sentence', 'demonstrating', 'NLTK', 'preprocessing', 'techniques', '.']


In [12]:
# Stemming
print("\nStemming:")
stemmer = PorterStemmer()
tokens = word_tokenize(text)
stemmed_tokens = [stemmer.stem(token) for token in tokens]
print(stemmed_tokens)


Stemming:
['thi', 'is', 'an', 'exampl', 'sentenc', 'for', 'demonstr', 'nltk', 'preprocess', 'techniqu', '.']


In [13]:
# Lemmatization
print("\nLemmatization:")
lemmatizer = WordNetLemmatizer()
tokens = word_tokenize(text)
lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens]
print(lemmatized_tokens)


Lemmatization:
['This', 'is', 'an', 'example', 'sentence', 'for', 'demonstrating', 'NLTK', 'preprocessing', 'technique', '.']


In [15]:
# Part-of-speech (POS) tagging
nltk.download('averaged_perceptron_tagger_eng')
print("\nPOS tagging:")
tokens = word_tokenize(text)
pos_tags = nltk.pos_tag(tokens)
print(pos_tags)

[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.



POS tagging:
[('This', 'DT'), ('is', 'VBZ'), ('an', 'DT'), ('example', 'NN'), ('sentence', 'NN'), ('for', 'IN'), ('demonstrating', 'VBG'), ('NLTK', 'NNP'), ('preprocessing', 'VBG'), ('techniques', 'NNS'), ('.', '.')]


In [24]:
import nltk
from nltk.corpus import wordnet

In [28]:
'''
# Download WordNet if not already downloaded
nltk.download('wordnet')'''

"\n# Download WordNet if not already downloaded\nnltk.download('wordnet')"

In [25]:
word = "Car"

In [26]:
synonyms = set()
for syn in wordnet.synsets(word):
  for lemma in syn.lemmas():
    synonyms.add(lemma.name())

In [27]:
print("synonyms for'{}':".format(word))
for synonym in synonyms:
  if synonym.lower() != word.lower():
    print(synonym)

synonyms for'Car':
auto
elevator_car
cable_car
machine
railroad_car
gondola
railway_car
railcar
automobile
motorcar


In [29]:
import random

In [30]:
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

### how can I knopw the synonym of a word "car" in google colab by the use of NLTK.Wordnet.

In [34]:
import nltk
from nltk.corpus import wordnet
import random

nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

def replace_with_synonyms(sentence, n):
    # Tokenize the sentence
    tokens = nltk.word_tokenize(sentence)

    # POS tag the tokens
    pos_tags = nltk.pos_tag(tokens)

    # Identify the words that can be replaced with synonyms
    replaceable_words = [(i, word, pos) for i, (word, pos) in enumerate(pos_tags) if pos.startswith('NN') or pos.startswith('VB') or pos.startswith('JJ') or pos.startswith('RB')]

    # Randomly select up to n words to replace
    num_replacements = min(n, len(replaceable_words))
    words_to_replace = random.sample(replaceable_words, num_replacements)

    # Replace the selected words with their synonyms
    for i, word, pos in words_to_replace:
        synonyms = get_synonyms(word, pos)
        if synonyms:
            tokens[i] = random.choice(synonyms)

    # Join the tokens back into a sentence
    new_sentence = ' '.join(tokens)

    return new_sentence

def get_synonyms(word, pos):
    # Map POS tags to WordNet POS tags
    pos_map = {
        'NN': 'n',
        'VB': 'v',
        'JJ': 'a',
        'RB': 'r'
    }

    # Get the WordNet POS tag
    wordnet_pos = pos_map.get(pos[:2])

    if wordnet_pos:
        synonyms = set()
        for syn in wordnet.synsets(word, wordnet_pos):
            for lemma in syn.lemmas():
                synonyms.add(lemma.name())
        # Remove the original word from the set of synonyms
        synonyms.discard(word)
        return list(synonyms)
    else:
        return []

# Test the function
sentence = "The quick brown fox jumps over the lazy dog."
n = 3
print(replace_with_synonyms(sentence, n))

The quick brown fox rise over the otiose frankfurter .


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


### I want to write a python function that takes a sentence and replace up to n random words with their synonyms using nltk wordnet

In [35]:
from nltk.corpus import wordnet
import random

In [41]:
def rep_syn(sentence, n):
  tokens = nltk.word_tokenize(sentence)
  pos_tags = nltk.pos_tag(tokens)
  replaceable_words = [(i, word, pos) for i, (word, pos) in enumerate(pos_tags)
  if pos.startswith('NN') or pos.startswith('VB') or pos.startswith('JJ') or pos.startswith('RB')]
  # Randomly select up to n words to replace
  num_replacements = min(n, len(replaceable_words))
  words_to_replace = random.sample(replaceable_words, num_replacements)
  for i, word, pos in words_to_replace:
      synonyms = get_synonyms(word, pos)
      if synonyms:
          tokens[i] = random.choice(synonyms)

  # Join the tokens back into a sentence
  new_sentence = ' '.join(tokens)

  return new_sentence

In [42]:
def get_synonyms(word, pos):
    # Map POS tags to WordNet POS tags
    pos_map = {
        'NN': 'n',
        'VB': 'v',
        'JJ': 'a',
        'RB': 'r'
    }

    # Get the WordNet POS tag
    wordnet_pos = pos_map.get(pos[:2])

    if wordnet_pos:
        synonyms = set()
        for syn in wordnet.synsets(word, wordnet_pos):
            for lemma in syn.lemmas():
                synonyms.add(lemma.name())
        # Remove the original word from the set of synonyms
        synonyms.discard(word)
        return list(synonyms)
    else:
        return []

In [43]:
def main():
    sentence = input("Enter a sentence: ")
    n = int(input("Enter the number of words to replace: "))
    new_sentence = replace_with_synonyms(sentence, n)
    print("Modified sentence:", new_sentence)

if __name__ == "__main__":
    main()

Enter a sentence: I love cars.
Enter the number of words to replace: 2
Modified sentence: I eff auto .


### Use spaCy's named entity recognition to identify entities in a sentence (such as people or locations) and replace them with randomly chosen alternatives of the same entity type.

In [44]:
import spacy
import random

if you need to download :
python -m spacy download en_core_web_sm

In [45]:
nlp = spacy.load("en_core_web_sm")

In [46]:
entities = {
    "PERSON": ["John", "Jane", "Alice", "Bob", "Mike"],
    "ORG": ["Google", "Amazon", "Microsoft", "Facebook", "Apple"],
    "GPE": ["New York", "London", "Paris", "Tokyo", "Sydney"],
    "LOC": ["Beach", "Mountain", "City", "Country", "Island"]
}

In [47]:
def replace_entities(sentence):
    # Process the sentence using spaCy
    doc = nlp(sentence)

    # Identify entities in the sentence
    entity_list = [(ent.text, ent.label_) for ent in doc.ents]

    # Replace entities with alternatives
    new_sentence = sentence
    for entity, label in entity_list:
        alternatives = entities.get(label)
        if alternatives:
            new_entity = random.choice(alternatives)
            new_sentence = new_sentence.replace(entity, new_entity)

    return new_sentence

In [49]:
def main():
    sentence = input("Enter a sentence: ")
    new_sentence = replace_entities(sentence)
    print("Modified sentence:", new_sentence)

if __name__ == "__main__":
    main()

Enter a sentence: I loce cars.
Modified sentence: I loce cars.


### Write a Python function that takes a sentence, translates it to French (or any other language), and then translates it back to English using the Google Translator from the deep_ translator library to create a paraphrased version.

In [57]:
!pip install deep_translator

Collecting deep_translator
  Downloading deep_translator-1.11.4-py3-none-any.whl.metadata (30 kB)
Downloading deep_translator-1.11.4-py3-none-any.whl (42 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.3/42.3 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: deep_translator
Successfully installed deep_translator-1.11.4


In [58]:
from deep_translator import GoogleTranslator

In [59]:
def paraphrase_sentence(sentence, target_lang='fr'):
    try:
        # Translate the sentence to the target language
        translated_sentence = GoogleTranslator(source='auto', target=target_lang).translate(sentence)

        # Translate the translated sentence back to English
        paraphrased_sentence = GoogleTranslator(source=target_lang, target='en').translate(translated_sentence)

        return paraphrased_sentence

    except Exception as e:
        print(f"An error occurred: {e}")
        return sentence

In [61]:
def main():
    sentence = input("Enter a sentence: ")
    paraphrased_sentence = paraphrase_sentence(sentence)
    print("Paraphrased sentence:", paraphrased_sentence)

if __name__ == "__main__":
    main()

Enter a sentence: I like no cars but I wanna work hard.
Paraphrased sentence: I don't like cars but I want to work hard.
