<a href="https://colab.research.google.com/github/SomdeepAcharyya/NLP/blob/main/NLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import io
import shutil
import string
import re
import nltk
import nltk.corpus
from nltk.tokenize import word_tokenize, blankline_tokenize  # tokenization
from nltk.probability import FreqDist    # frequency of words 
from nltk.util import bigrams, trigrams, ngrams     # dividing sentence into phrases
from nltk.stem import PorterStemmer, LancasterStemmer, SnowballStemmer   # stemming
from nltk.stem import wordnet, WordNetLemmatizer    # lemmatization
from nltk import ne_chunk   # Named Entity Recognition

In [None]:
nltk.download('brown')
from nltk.corpus import brown
brown.words()

[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Unzipping corpora/brown.zip.


['The', 'Fulton', 'County', 'Grand', 'Jury', 'said', ...]

In [None]:
hamlet = nltk.corpus.gutenberg.words('shakespeare-hamlet.txt')
macbeth = nltk.corpus.gutenberg.words('shakespeare-macbeth.txt')

In [None]:
ai = 'Less than a decade after breaking the Nazi encryption machine Enigma and helping the Allied Forces win World War II, mathematician Alan Turing changed history a second time with a simple question: Can machines think? '

In [None]:
nltk.download('punkt')
ai_tokens = word_tokenize(ai)
ai_tokens

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


['Less',
 'than',
 'a',
 'decade',
 'after',
 'breaking',
 'the',
 'Nazi',
 'encryption',
 'machine',
 'Enigma',
 'and',
 'helping',
 'the',
 'Allied',
 'Forces',
 'win',
 'World',
 'War',
 'II',
 ',',
 'mathematician',
 'Alan',
 'Turing',
 'changed',
 'history',
 'a',
 'second',
 'time',
 'with',
 'a',
 'simple',
 'question',
 ':',
 'Can',
 'machines',
 'think',
 '?']

In [None]:
fdist =  FreqDist()

In [None]:
for word in ai_tokens:
  fdist[word.lower()] += 1
fdist

FreqDist({',': 1,
          ':': 1,
          '?': 1,
          'a': 3,
          'after': 1,
          'alan': 1,
          'allied': 1,
          'and': 1,
          'breaking': 1,
          'can': 1,
          'changed': 1,
          'decade': 1,
          'encryption': 1,
          'enigma': 1,
          'forces': 1,
          'helping': 1,
          'history': 1,
          'ii': 1,
          'less': 1,
          'machine': 1,
          'machines': 1,
          'mathematician': 1,
          'nazi': 1,
          'question': 1,
          'second': 1,
          'simple': 1,
          'than': 1,
          'the': 2,
          'think': 1,
          'time': 1,
          'turing': 1,
          'war': 1,
          'win': 1,
          'with': 1,
          'world': 1})

In [None]:
ai_blank = blankline_tokenize(ai)
ai_blank

['Less than a decade after breaking the Nazi encryption machine Enigma and helping the Allied Forces win World War II, mathematician Alan Turing changed history a second time with a simple question: Can machines think? ']

In [None]:
ai_ngrams = list(nltk.ngrams(ai_tokens, 7))
ai_ngrams

In [None]:
pst = PorterStemmer()
lst = LancasterStemmer()
snb = SnowballStemmer('english')
stemm_words = []
for words in ai_tokens:
  stemm_words.append(snb.stem(words))
  print(words+':'+snb.stem(words))

Less:less
than:than
a:a
decade:decad
after:after
breaking:break
the:the
Nazi:nazi
encryption:encrypt
machine:machin
Enigma:enigma
and:and
helping:help
the:the
Allied:alli
Forces:forc
win:win
World:world
War:war
II:ii
,:,
mathematician:mathematician
Alan:alan
Turing:ture
changed:chang
history:histori
a:a
second:second
time:time
with:with
a:a
simple:simpl
question:question
:::
Can:can
machines:machin
think:think
?:?


In [None]:
word_len = WordNetLemmatizer()
nltk.download('wordnet')
lemm_words = []
for words in ai_tokens:
  lemm_words.append(word_len.lemmatize(words))
  print(words+':'+word_len.lemmatize(words))

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.
Less:Less
than:than
a:a
decade:decade
after:after
breaking:breaking
the:the
Nazi:Nazi
encryption:encryption
machine:machine
Enigma:Enigma
and:and
helping:helping
the:the
Allied:Allied
Forces:Forces
win:win
World:World
War:War
II:II
,:,
mathematician:mathematician
Alan:Alan
Turing:Turing
changed:changed
history:history
a:a
second:second
time:time
with:with
a:a
simple:simple
question:question
:::
Can:Can
machines:machine
think:think
?:?


In [None]:
punc_words = []
punctuation = re.compile(r'[-.?!,:;()|0-9]')
for words in lemm_words:
  word =  punctuation.sub("", words)
  if len(word) > 0:
    punc_words.append(word)
punc_words

['Less',
 'than',
 'a',
 'decade',
 'after',
 'breaking',
 'the',
 'Nazi',
 'encryption',
 'machine',
 'Enigma',
 'and',
 'helping',
 'the',
 'Allied',
 'Forces',
 'win',
 'World',
 'War',
 'II',
 'mathematician',
 'Alan',
 'Turing',
 'changed',
 'history',
 'a',
 'second',
 'time',
 'with',
 'a',
 'simple',
 'question',
 'Can',
 'machine',
 'think']

In [None]:
nltk.download('averaged_perceptron_tagger')
ai_pos = nltk.pos_tag(punc_words)
ai_pos

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


[('Less', 'JJR'),
 ('than', 'IN'),
 ('a', 'DT'),
 ('decade', 'NN'),
 ('after', 'IN'),
 ('breaking', 'VBG'),
 ('the', 'DT'),
 ('Nazi', 'JJ'),
 ('encryption', 'NN'),
 ('machine', 'NN'),
 ('Enigma', 'NNP'),
 ('and', 'CC'),
 ('helping', 'VBG'),
 ('the', 'DT'),
 ('Allied', 'NNP'),
 ('Forces', 'NNPS'),
 ('win', 'VBP'),
 ('World', 'NNP'),
 ('War', 'NNP'),
 ('II', 'NNP'),
 ('mathematician', 'NN'),
 ('Alan', 'NNP'),
 ('Turing', 'NNP'),
 ('changed', 'VBD'),
 ('history', 'NN'),
 ('a', 'DT'),
 ('second', 'JJ'),
 ('time', 'NN'),
 ('with', 'IN'),
 ('a', 'DT'),
 ('simple', 'JJ'),
 ('question', 'NN'),
 ('Can', 'MD'),
 ('machine', 'NN'),
 ('think', 'VB')]

In [None]:
nltk.download('maxent_ne_chunker')
nltk.download('words')
ai_ner = ne_chunk(punc_words)
ai_ner

[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!


IndexError: ignored

In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization

In [None]:
def get_user_score():
  import spacy
  spc = spacy.load('en_core_web_sm')
  keywords = 'bengal west election campaign news bjp trinamool chief minister communists india hindu muslim mla mp vidhan sabha bengali bangla tmc cpim cpm mamata modi'
  

In [None]:
# Importing TextBlob
from textblob import TextBlob
# Creating a textblob object and assigning the sentiment propertygete
def get_polarity(a):
  analysis = TextBlob(a).polarity
  return analysis
def get_subjectivity(a):
  analysis = TextBlob(a).subjectivity
  return analysis