# Assignment 2
----

### Word Replacement using Regular Expression


In [1]:
import re
import nltk
from nltk.corpus import wordnet
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /home/jovyan/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

Replace the following patterns

In [2]:
R_patterns = [
    (r'won\'t','will not'),
    (r'can\'t','cannot'),
    (r'i\'m','i am'),
    (r'(\w+)n\'ll','\g<1> will'),
    (r'(\w+)n\'t','\g<1> not'),
    (r'(\w+)\'ve','\g<1> have'),
    (r'(\w+)\'s','\g<1> is'),
    (r'(\w+)\'re','\g<1> are'),
]

Class for Replacing the patterns in text

In [3]:
class REReplacer(object):
  def __init__(self, patterns=R_patterns):
    self.patterns = [(re.compile(regex), repl) for (regex, repl) in patterns]
  def replace(self, text):
    s = text
    for (pattern, repl) in self.patterns:
        s = re.sub(pattern, repl, s)
    return s

In [6]:
rep_word = REReplacer()
rep_word.replace("I won't do it")

'I will not do it'

### Removing Repeating letters

Demonstration

Function for removing repeating letters

In [4]:
def rep_repeating(word):
  repeat_regexp = re.compile(r'(\w*)(\w)\2(\w*)')
  repl = r'\1\2\3'
  if wordnet.synsets(word):
    return word
  repl_word = repeat_regexp.sub(repl, word)
  if repl_word != word:
    return rep_repeating(repl_word)
  else:
    return repl_word

Demonstration

In [5]:
print(rep_repeating('Heyyy'))
print(rep_repeating("Hellooooo"))
print(rep_repeating("This is greatt"))
print(rep_repeating("Noooooo"))
print(rep_repeating("Okayyyy"))


Hey
Hello
This is great
No
Okay


### Synonym and Antonym Replacement

  * Synonym

In [9]:
from google.colab import files 
uploaded = files.upload()

Saving synonym.csv to synonym.csv


In [8]:
import csv

class syn_replacer(object):

  def __init__(self):

    #Reading Word Map from CSV File
    self.word_map = {}

    for line in csv.reader(open('synonym.csv')):
      word, syn = line
      self.word_map[word] = syn
  

  def replace(self,word):
    return self.word_map.get(word,word)

  def replace_sent(self,sent):
    w_list = sent.split(" ")
    w_list = [self.word_map.get(w,w) for w in w_list]
    return w_list

In [9]:
syn_rep = syn_replacer()
print(syn_rep.replace('bday'))
print(syn_rep.replace_sent('Today is my bday'))
print(syn_rep.replace_sent('Are you ok'))

Birthday
['Today', 'is', 'my', 'Birthday']
['Are', 'you', 'Okay']


  * Antonym

In [10]:
class word_antonym_replacer(object):
  def replace(self, word, pos=None):
    antonyms = set()
    for syn in wordnet.synsets(word, pos=pos):
        for lemma in syn.lemmas():
          for antonym in lemma.antonyms():
            antonyms.add(antonym.name())
    if len(antonyms) == 1:
        return antonyms.pop()
    else:
        return None
  def replace_negations(self, sent):
    i, l = 0, len(sent)
    words = []
    while i < l:
        word = sent[i]
        if word == 'not' and i+1 < l:
          ant = self.replace(sent[i+1])
          if ant:
              words.append(ant)
              i += 2
              continue
        words.append(word)
        i += 1
    return words

In [13]:
rep_ant = word_antonym_replacer()

print(rep_ant.replace('dangerous'))
print(rep_ant.replace_negations(('This box is not heavy').split(' ')))
print(rep_ant.replace_negations(('Is this route not dangerous').split(' ')))

safe
['This', 'box', 'is', 'light']
['Is', 'this', 'route', 'safe']
