In [None]:
!pip install textblob
!python -m textblob.download_corpora

[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Unzipping corpora/brown.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.
[nltk_data] Downloading package conll2000 to /root/nltk_data...
[nltk_data]   Unzipping corpora/conll2000.zip.
[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Unzipping corpora/movie_reviews.zip.
Finished.


## Sentiment Analysis

In [None]:
from textblob import TextBlob

text = "The product is amazing and works like a charm!"
blob = TextBlob(text)
print(blob.sentiment)  # Sentiment(polarity=0.8, subjectivity=0.75)

Sentiment(polarity=0.7500000000000001, subjectivity=0.9)


## Text translation

In [None]:
!pip install googletrans==4.0.0-rc1

Collecting googletrans==4.0.0-rc1
  Downloading googletrans-4.0.0rc1.tar.gz (20 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting httpx==0.13.3 (from googletrans==4.0.0-rc1)
  Downloading httpx-0.13.3-py3-none-any.whl.metadata (25 kB)
Collecting hstspreload (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading hstspreload-2025.1.1-py3-none-any.whl.metadata (2.1 kB)
Collecting chardet==3.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading chardet-3.0.4-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting idna==2.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading idna-2.10-py2.py3-none-any.whl.metadata (9.1 kB)
Collecting rfc3986<2,>=1.3 (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading rfc3986-1.5.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting httpcore==0.9.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading httpcore-0.9.1-py3-none-any.whl.metadata (4.6 kB)
Collecting h11<0.10,>=0.8 (from httpcore==0.9.*->httpx==0.13.3->googl

In [None]:
from textblob import TextBlob
from googletrans import Translator

def translate_blob(text, target_lang='en'):
    translator = Translator()
    return translator.translate(str(TextBlob(text)), dest=target_lang).text

translated = translate_blob("Ceci est un test", target_lang='en')
print("Translated:", translated)  # Translated: This is a test

Translated: This is a test


## Spelling Correction

In [None]:
from textblob import TextBlob

blob = TextBlob("I havv goo spelin")
corrected = blob.correct()
print(corrected)  # "I have good spelling"

## Noun Phrase Extraction

In [None]:
from textblob import TextBlob

blob = TextBlob("The camera quality and battery life are fantastic.")
print(blob.noun_phrases)  # ['camera quality', 'battery life']

['camera quality', 'battery life']


## NER

In [None]:
from textblob import TextBlob

blob = TextBlob("Barack Obama was born in Hawaii.")
for word, tag in blob.tags:
    if tag == 'NNP':
        print(word)  # Barack, Obama, Hawaii

Barack
Obama
Hawaii


## Text Summarization (Basic Keyword-Based)

In [None]:
from textblob import TextBlob
from collections import Counter

text = """Python is a great programming language. It is widely used for data analysis, machine learning, web development, and automation."""
blob = TextBlob(text)
nouns = [word.lower() for word, tag in blob.tags if tag.startswith("NN")]
common_nouns = Counter(nouns).most_common(3)
summary = ' '.join([word for word, count in common_nouns])
print("Summary:", summary)

Summary: python programming language


## Language Detection

In [None]:
!pip install langdetect

Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/981.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.9/981.5 kB[0m [31m4.2 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m972.8/981.5 kB[0m [31m14.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: langdetect
  Building wheel for langdetect (setup.py) ... [?25l[?25hdone
  Created wheel for langdetect: filename=langdetect-1.0.9-py3-none-any.whl size=993223 sha256=64b34be08d547d1cce68669ee457920679eaab01cafc6331271c6e236abe77ce
  Stored in directory: /root/.cache/pip/wheels/0a/f2/b2/e5ca405801e05eb7c8ed5b3b4bcf1fca

In [None]:
from textblob import TextBlob
from langdetect import detect

def detect_language(text):
    return detect(text)

blob = TextBlob("Hola, ¿cómo estás?")
print("Language:", detect_language(str(blob)))  # Output: 'es'

Language: es


## POS Tagging

In [None]:
from textblob import TextBlob

blob = TextBlob("Python makes data science easier.")
print(blob.tags)  # [('Python', 'NNP'), ('makes', 'VBZ'), ('data', 'NNS'), ('science', 'NN'), ('easier', 'JJR')]

[('Python', 'NNP'), ('makes', 'VBZ'), ('data', 'NNS'), ('science', 'NN'), ('easier', 'JJR')]


## Keyword Extraction

In [None]:
from textblob import TextBlob

blob = TextBlob("The camera lens is sharp, and the zoom feature is quite useful.")
keywords = [phrase for phrase in blob.noun_phrases]
print("Keywords:", keywords)


Keywords: ['camera lens']


## Topic Clustering

In [None]:
from textblob import TextBlob
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans

docs = [
    "I love Python programming and data science.",
    "Machine learning is fascinating.",
    "I enjoy developing websites using Django.",
    "Statistics and probability are key to data science.",
    "Flask and Django are popular Python frameworks."
]

cleaned_docs = [' '.join(TextBlob(doc).words.lower()) for doc in docs]

vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(cleaned_docs)

kmeans = KMeans(n_clusters=2, random_state=42).fit(X)
for i, label in enumerate(kmeans.labels_):
    print(f"Doc {i} → Cluster {label}")

Doc 0 → Cluster 1
Doc 1 → Cluster 0
Doc 2 → Cluster 1
Doc 3 → Cluster 1
Doc 4 → Cluster 1
