In [None]:
!pip install textblob



In [None]:
!python -mtextblob.download_corpora

[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Unzipping corpora/brown.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package conll2000 to /root/nltk_data...
[nltk_data]   Unzipping corpora/conll2000.zip.
[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Unzipping corpora/movie_reviews.zip.
Finished.


In [None]:
from textblob import TextBlob

In [None]:
wiki=TextBlob("I Love Natural Language processing,not you!")
wiki.tags

[('I', 'PRP'),
 ('Love', 'VBP'),
 ('Natural', 'JJ'),
 ('Language', 'NNP'),
 ('processing', 'NN'),
 ('not', 'RB'),
 ('you', 'PRP')]

Noun phase extraction

In [None]:
wiki.noun_phrases

WordList(['love', 'language'])

sentiment analysis

In [None]:
testimonial=TextBlob("Textblob is amazingly simple to use. What great fun!")
testimonial.sentiment

Sentiment(polarity=0.39166666666666666, subjectivity=0.4357142857142857)

In [None]:
testimonial.sentiment.polarity

0.39166666666666666

Tokenization

In [None]:
zen=TextBlob("Data is a new Fuel."
              "Explicit is better than implicit."
              "Simple is better than complex.")
zen.words

WordList(['Data', 'is', 'a', 'new', 'Fuel.Explicit', 'is', 'better', 'than', 'implicit.Simple', 'is', 'better', 'than', 'complex'])

In [None]:
zen.sentences

[Sentence("Data is a new Fuel.Explicit is better than implicit.Simple is better than complex.")]

In [None]:
for sentence in zen.sentences:
  print(sentence)

Data is a new Fuel.Explicit is better than implicit.Simple is better than complex.


word inflection and lemmatization

In [None]:
sentence=TextBlob("use 4 spaces per indentation level")
sentence.words

WordList(['use', '4', 'spaces', 'per', 'indentation', 'level'])

In [None]:
sentence.words[2].singularize()
sentence.words[0].pluralize()

'uses'

In [None]:
from textblob import Word

In [None]:
a=Word("lions")
a.lemmatize()

'lion'

In [None]:
a=Word("went")
a.lemmatize("v")

'go'

wordnet integration

In [None]:
Word("length").definitions

['the linear extent in space from one end to the other; the longest dimension of something that is fixed in place',
 'continuance in time',
 'the property of being the extent of something from beginning to end',
 'size of the gap between two places',
 'a section of something that is long and narrow']

In [None]:
from textblob.wordnet import Synset

In [None]:
octopus=Synset("Octopus.n.02")
shrimp=Synset("shrimp.n.03")
octopus.path_similarity(shrimp)

0.1111111111111111

wordlists

In [None]:
animals=TextBlob("lions elephants giraffes")
animals.words

WordList(['lions', 'elephants', 'giraffes'])

In [None]:
animals.words.pluralize()

WordList(['lionss', 'elephantss', 'giraffess'])

In [None]:
#spelling correction
g=TextBlob("can you prounance princa")
g.correct()

TextBlob("can you prounance prince")

In [None]:
k=Word("longituod")
k.spellcheck()

[('longitude', 1.0)]

In [None]:
#Get word and Noun phrase frequencies
sent=TextBlob("she sales sea shellsat the Sea shore")
sent.word_counts["sea"]

2

In [None]:
sent.words.count("sea")

2

In [None]:
sent.words.count('Sea',case_sensitive=True)

1

In [None]:

sent.noun_phrases.count("sea")

0

Translation and languagedetection

In [None]:
blob=TextBlob(u"something")
blob.translate(from_language='en',to='hi')


TypeError: BaseBlob.translate() got an unexpected keyword argument 'from_language'

In [None]:
chines_blob=TextBlob(u'年年有余')
chines_blob.translate(from_lang='zh',to='en')


TextBlob("Over the year")

In [None]:
text=TextBlob("hello world")
print(text.detect_language())

HTTPError: HTTP Error 400: Bad Request

In [None]:
d=TextBlob("बन जाना ")
d.detect_language()

HTTPError: HTTP Error 400: Bad Request

In [None]:
a_blob=TextBlob("apple")
b_blob=TextBlob("orange")
a_blob<=b_blob

True

In [None]:
blob=TextBlob("now is better than never")
blob.ngrams(n=2)

[WordList(['now', 'is']),
 WordList(['is', 'better']),
 WordList(['better', 'than']),
 WordList(['than', 'never'])]

In [None]:
for k in zen.sentences:
  print(k)
  print("---starts at index")

Data is a new Fuel.Explicit is better than implicit.Simple is better than complex.
---starts at index


Text classification

In [None]:
training = [
            ('Tom Holland is a terrible spiderman.','pos'),
            ('a terrible Javert (Russell Crowe) ruined Les Miserables for me...','pos'),
            ('The Dark Knight Rises is the greatest superhero movie ever!','neg'),
            ('Fantastic Four should have never been made.','pos'),
            ('Wes Anderson is my favorite director!','neg'),
            ('Captain America 2 is pretty awesome.','neg'),
            ('Let\s pretend "Batman and Robin" never happened..','pos'),
            ]
testing = [
           ('Superman was never an interesting character.','pos'),
           ('Fantastic Mr Fox is an awesome film!','neg'),
           ('Dragonball Evolution is simply terrible!!','pos')
           ]

In [None]:
from textblob import classifiers

classifier = classifiers.NaiveBayesClassifier(training)

In [None]:
print (classifier.accuracy(testing))
classifier.show_informative_features(3)

1.0
Most Informative Features
            contains(is) = True              neg : pos    =      2.9 : 1.0
             contains(a) = False             neg : pos    =      1.8 : 1.0
         contains(never) = False             neg : pos    =      1.8 : 1.0


In [None]:
blob = TextBlob('the weather is terrible!', classifier=classifier)
print (blob.classify())

neg


In [None]:
# Get probabilities using classifier directly
def get_probabilities(text):
    probabilities = classifier.prob_classify(text)
    return {
        'pos': round(probabilities.prob('pos'), 2),
        'neg': round(probabilities.prob('neg'), 2)
    }

probabilities = get_probabilities('the weather is terrible!')
print(f'Probability of Positive: {probabilities["pos"]}')
print(f'Probability of Negative: {probabilities["neg"]}')


Probability of Positive: 0.39
Probability of Negative: 0.61


**Spacy**

In [None]:
!pip install spacy



In [None]:
import subprocess
import sys


In [None]:
import spacy

# Load the small English language model
nlp = spacy.load("en_core_web_sm")

# Process a sample sentence with the loaded language model
doc = nlp("Apple is looking at buying U.K. startup for $1 billion")

# Loop through each word (token) in the processed sentence
for token in doc:
    # Print the word, its part-of-speech tag, and its syntactic dependency
    print(token.text, token.pos_, token.dep_)

# Explanation of the printed information:
# Text: The original word in the sentence.
# POS: The part-of-speech tag (e.g., noun, verb, adjective).
# Dep: The syntactic dependency, which shows the relationship between words in the sentence.


Apple PROPN nsubj
is AUX aux
looking VERB ROOT
at ADP prep
buying VERB pcomp
U.K. PROPN dobj
startup NOUN dep
for ADP prep
$ SYM quantmod
1 NUM compound
billion NUM pobj


In [None]:
nlp = spacy.load("en_core_web_sm")

# Process the text to create a Doc object
doc = nlp("Apple is looking at buying U.K. startup for $1 billion")

# Iterate over each token in the Doc and print the token's text
for token in doc:
    print(token.text)

Apple
is
looking
at
buying
U.K.
startup
for
$
1
billion


In [None]:
# Load the small English language model
nlp = spacy.load("en_core_web_sm")

# Process the text to create a Doc object
doc = nlp("Coronavirus: Delhi resident tests positive for coronavirus, total 31 people infected in India")

# Iterate over each token in the Doc and print various attributes
for token in doc:
    print(
        token.text,        # The original text of the token
        token.lemma_,      # The base form of the token
        token.pos_,        # The part of speech tag
        token.tag_,        # The detailed part of speech tag
        token.dep_,        # The syntactic dependency
        token.shape_,      # The shape of the token (e.g., "Xxxx" for "Apple")
        token.is_alpha,    # Is the token an alphabetic word?
        token.is_stop      # Is the token a stop word?
    )

Coronavirus coronavirus NOUN NN nsubj Xxxxx True False
: : PUNCT : punct : False False
Delhi Delhi PROPN NNP advmod Xxxxx True False
resident resident NOUN NN compound xxxx True False
tests test NOUN NNS nsubj xxxx True False
positive positive ADJ JJ amod xxxx True False
for for ADP IN prep xxx True True
coronavirus coronavirus NOUN NN pobj xxxx True False
, , PUNCT , punct , False False
total total ADJ JJ ROOT xxxx True False
31 31 NUM CD nummod dd False False
people people NOUN NNS dobj xxxx True False
infected infect VERB VBN acl xxxx True False
in in ADP IN prep xx True True
India India PROPN NNP pobj Xxxxx True False


In [None]:
from spacy import displacy

# Load the spaCy model (en_core_web_sm)
nlp = spacy.load("en_core_web_sm")

# Create a Doc object containing the sentence to be parsed
doc = nlp("Google, Apple crack down on fake coronavirus apps")

# Display the parsed sentence using displacy.serve()
displacy.serve(doc, style="dep")




Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...



In [None]:
nlp = spacy.load("en_core_web_sm")  # Load the small English language model

# Create a sentence to process
doc = nlp("Coronavirus: Delhi resident tests positive for coronavirus, total 31 people infected in India")

# Identify named entities in the sentence
for ent in doc.ents:
    # Print the text of the entity, its starting and ending character position, and its label
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

In [None]:
from spacy import displacy

# This line imports the spaCy library, which is a free open-source library for natural language processing (NLP)

text = "Coronavirus: Delhi resident tests positive for coronavirus, total 31 people infected in India"

# This line assigns a string to a variable named text. The string contains a sentence about a coronavirus case in India.

nlp = spacy.load("en_core_web_sm")

# This line loads a pre-trained spaCy model for English named entity recognition (NER). The "en_core_web_sm" model is a small model that is efficient to use for basic tasks.

doc = nlp(text)

# This line applies the loaded model to the text variable and stores the results in a variable named doc. The doc variable now contains a spaCy Doc object, which holds the linguistic analysis of the text.

displacy.serve(doc, style="ent")

# This line displays the named entities found in the text using displacy. The "ent" style argument specifies that we want to see the named entity tags.

# https://spacy.io/api/annotation#named-entities


In [None]:
import spacy.cli
spacy.cli.download("en_core_web_md")

import en_core_web_md
nlp = en_core_web_md.load()

In [None]:

nlp = spacy.load("en_core_web_md")  # Load the English language model
tokens = nlp("lion bear apple banana fadsfdshds")  # Create a Doc object from the text

for token in tokens:
  print(token.text, token.has_vector, token.vector_norm, token.is_oov)

In [None]:
nlp = spacy.load("en_core_web_md")

# Create a sentence to process
sentence = "lion bear cow apple mango spinach"

# Tokenize the sentence - break it down into individual words
tokens = nlp(sentence)

# Iterate over each token and compare it to other tokens in the sentence
# Print the text of the token, another token for comparison, and their similarity score
for token1 in tokens:
    for token2 in tokens:
        print(token1.text, token2.text, token1.similarity(token2))
