# Python Built-in-Function - Count Words

In [2]:
text = "Natural Language Processing (NLP) is a fascinating field!"
words = text.split()
word_count = len(words)
print("Word Count : ",word_count)

Word Count :  8


# Regex - Handle Punctuation and Count Words

In [3]:
from typing_extensions import Text
import re
def count_word(text):
  word = re.findall(r'\b\w+\b',text)
  return len(word)

text = "Hello, world! Welcome to NLP."
print("Word Count:",count_word(text))

Word Count: 5


# NLTk-Python

In [6]:
import nltk
nltk.download('punkt_tab')
from nltk import word_tokenize

def count_word_nltk(text):
  words = word_tokenize(text)
  return len(words)

text = "Let's explore NLP with Python!"
print("Word Count : ",count_word_nltk(text))

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


Word Count :  7


# Spacy

In [7]:
import spacy
nlp = spacy.load("en_core_web_sm")

def count_words_spacy(text):
  doc = nlp(text)
  words = [token.text for token in doc if not token.is_punct]
  return len(words)

text = "SpaCy is a powerful NLP library."
print("Word Count : ",count_words_spacy(text))

Word Count :  6


# Counter

In [10]:
from collections import Counter

def word_frequency(text):
  words = text.lower().split()
  return Counter(words)

text = "NLP is fun. NLP is powerful. NLP is the future."
print("Word Frequencies:", word_frequency(text))

Word Frequencies: Counter({'nlp': 3, 'is': 3, 'fun.': 1, 'powerful.': 1, 'the': 1, 'future.': 1})


# NLP Task 1 - Short vs Long sentences

In [13]:
from nltk.tokenize import sent_tokenize

def categorize_sentences(text):
  sentences = sent_tokenize(text)
  categorized = {"short":[],"long":[]}

  for sentence in sentences:
    word_count = len(word_tokenize(sentence))
    if word_count < 5:
      categorized["short"].append(sentence)
    else:
      categorized["long"].append(sentence)

  return categorized

text = "Hello! NLP is fun. Let's learn NLP together. It helps in many fields."
categories = categorize_sentences(text)
print("Short Sentence :",categories["short"])
print("Long Sentence :",categories["long"])

Short Sentence : ['Hello!', 'NLP is fun.']
Long Sentence : ["Let's learn NLP together.", 'It helps in many fields.']


# NLP Task 2 - Rule Based Sentiment Analysis

In [14]:
import time

positive_word = {"happy", "good", "great", "excellent", "love", "amazing"}
negative_word = {"sad", "bad", "terrible", "hate", "awful", "worst"}

def sentiment_analysis(text):
  start_time = time.time()
  words = text.lower().split()
  pos_count = sum(1 for word in words if word in positive_word)
  neg_count = sum(1 for word in words if word in negative_word)

  sentiment = "Positive Sentiment " if pos_count > neg_count else "Negative Sentiment" if neg_count > pos_count else "Neutral Sentiment"

  end_time = time.time()
  execution_time = end_time - start_time
  return sentiment,execution_time

text = """
I'm happy you're the President, and thank you for bringing me home.
I have never been so proud to be an American citizen. Thank you, Mr. President.
"""
sentiment , exec_time = sentiment_analysis(text)
print("Sentiment :",sentiment)
print("Execution time: {:.6f} seconds".format(exec_time))

Sentiment : Positive Sentiment 
Execution time: 0.000012 seconds
