In [1]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer

In [None]:
# Download NLTK resources (if needed)
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

In [3]:
# Tokenization
text = "Python is a general-purpose language, used to create a range of applications, including data science, software and web development, automation, and improving the ease of everyday tasks."
tokens = word_tokenize(text)
print("Tokenization:", tokens)

Tokenization: ['Python', 'is', 'a', 'general-purpose', 'language', ',', 'used', 'to', 'create', 'a', 'range', 'of', 'applications', ',', 'including', 'data', 'science', ',', 'software', 'and', 'web', 'development', ',', 'automation', ',', 'and', 'improving', 'the', 'ease', 'of', 'everyday', 'tasks', '.']


In [4]:
# Stopword removal
stop_words = set(stopwords.words('english'))
filtered_tokens = [word for word in tokens if word.lower() not in stop_words]
print("Stopword removal:", filtered_tokens)

Stopword removal: ['Python', 'general-purpose', 'language', ',', 'used', 'create', 'range', 'applications', ',', 'including', 'data', 'science', ',', 'software', 'web', 'development', ',', 'automation', ',', 'improving', 'ease', 'everyday', 'tasks', '.']


In [5]:
# Stemming
ps = PorterStemmer()
stemmed_tokens = [ps.stem(word) for word in filtered_tokens]
print("Stemming:", stemmed_tokens)

Stemming: ['python', 'general-purpos', 'languag', ',', 'use', 'creat', 'rang', 'applic', ',', 'includ', 'data', 'scienc', ',', 'softwar', 'web', 'develop', ',', 'autom', ',', 'improv', 'eas', 'everyday', 'task', '.']


In [6]:
# Lemmatization
lemmatizer = WordNetLemmatizer()
lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]
print("Lemmatization:", lemmatized_tokens)

Lemmatization: ['Python', 'general-purpose', 'language', ',', 'used', 'create', 'range', 'application', ',', 'including', 'data', 'science', ',', 'software', 'web', 'development', ',', 'automation', ',', 'improving', 'ease', 'everyday', 'task', '.']


In [7]:
import spacy
# Load spaCy English model
nlp = spacy.load("en_core_web_sm")
# Tokenization, Lemmatization, Part-of-Speech Tagging
doc = nlp("Computer vision is a field of computer science that focuses on enabling computers to identify and understand objects and people in images and videos. Like other types of AI, computer vision seeks to perform and automate tasks that replicate human capabilities.")
tokens = [token.text for token in doc]
lemmatized_tokens = [token.lemma_ for token in doc]
pos_tags = [(token.text, token.pos_) for token in doc]
print("Tokenization:", tokens)
print("Lemmatization:", lemmatized_tokens)
print("Part-of-Speech Tagging:",pos_tags)

Tokenization: ['Computer', 'vision', 'is', 'a', 'field', 'of', 'computer', 'science', 'that', 'focuses', 'on', 'enabling', 'computers', 'to', 'identify', 'and', 'understand', 'objects', 'and', 'people', 'in', 'images', 'and', 'videos', '.', 'Like', 'other', 'types', 'of', 'AI', ',', 'computer', 'vision', 'seeks', 'to', 'perform', 'and', 'automate', 'tasks', 'that', 'replicate', 'human', 'capabilities', '.']
Lemmatization: ['computer', 'vision', 'be', 'a', 'field', 'of', 'computer', 'science', 'that', 'focus', 'on', 'enable', 'computer', 'to', 'identify', 'and', 'understand', 'object', 'and', 'people', 'in', 'image', 'and', 'video', '.', 'like', 'other', 'type', 'of', 'AI', ',', 'computer', 'vision', 'seek', 'to', 'perform', 'and', 'automate', 'task', 'that', 'replicate', 'human', 'capability', '.']
Part-of-Speech Tagging: [('Computer', 'NOUN'), ('vision', 'NOUN'), ('is', 'AUX'), ('a', 'DET'), ('field', 'NOUN'), ('of', 'ADP'), ('computer', 'NOUN'), ('science', 'NOUN'), ('that', 'PRON'),

In [8]:
from collections import Counter
# Word Frequency Counting
word_freq = Counter(tokens)
# Named Entity Recognition
named_entities = [(entity.text, entity.label_) for entity in doc.ents]
# Bag of Words Representation
bag_of_words = {token.text: token.is_alpha for token in doc}
print("Word Frequency Count:", word_freq)
print("Named Entities:", named_entities)
print("Bag of Words Representation:", bag_of_words)

Word Frequency Count: Counter({'and': 4, 'vision': 2, 'of': 2, 'computer': 2, 'that': 2, 'to': 2, '.': 2, 'Computer': 1, 'is': 1, 'a': 1, 'field': 1, 'science': 1, 'focuses': 1, 'on': 1, 'enabling': 1, 'computers': 1, 'identify': 1, 'understand': 1, 'objects': 1, 'people': 1, 'in': 1, 'images': 1, 'videos': 1, 'Like': 1, 'other': 1, 'types': 1, 'AI': 1, ',': 1, 'seeks': 1, 'perform': 1, 'automate': 1, 'tasks': 1, 'replicate': 1, 'human': 1, 'capabilities': 1})
Named Entities: [('AI', 'ORG')]
Bag of Words Representation: {'Computer': True, 'vision': True, 'is': True, 'a': True, 'field': True, 'of': True, 'computer': True, 'science': True, 'that': True, 'focuses': True, 'on': True, 'enabling': True, 'computers': True, 'to': True, 'identify': True, 'and': True, 'understand': True, 'objects': True, 'people': True, 'in': True, 'images': True, 'videos': True, '.': False, 'Like': True, 'other': True, 'types': True, 'AI': True, ',': False, 'seeks': True, 'perform': True, 'automate': True, 'tas

In [None]:
!python -m textblob.download_corpora
!pip install googletrans==4.0.0-rc1

In [10]:
from textblob import TextBlob
text = "I Love AI."
blob = TextBlob(text)
# Sentiment Analysis
print("Sentiment Analysis:", blob.sentiment)

Sentiment Analysis: Sentiment(polarity=0.5, subjectivity=0.6)


In [11]:
from googletrans import Translator
text = "I Love AI."

# Detect language
translator = Translator()
detected_language = translator.detect(text).lang
print("Detected Language:", detected_language)

# Translate to French
if detected_language != 'fr':  # Check if the detected language is not already French
    translated_text = translator.translate(text, dest='fr').text
    print("Translation to French:", translated_text)
else:
    print("Text is already in French.")

Detected Language: en
Translation to French: J'adore ai.


In [12]:
text = "I lov AI."
blob = TextBlob(text)

# Correct the text
corrected_text = blob.correct()

print("Original Text:", text)
print("Corrected Text:", corrected_text)

Original Text: I lov AI.
Corrected Text: I love of.
