<a href="https://colab.research.google.com/github/Prakhar021-hub/Natural-Language-Processing/blob/main/NLP_word_count.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Python Built-in Function - Count Words

In [None]:
text = "Natural Language Processing (NLP) is a fascinating field!"
words = text.split()
word_count = len(words)
print("Word Count:", word_count)

Word Count: 8


## Regex - Handle Punctuation and Count Words

In [None]:
import re

def count_words(text):
    words = re.findall(r'\b\w+\b', text)
    return len(words)

text = "Hello, world! Welcome to NLP."
print("Word Count:", count_words(text))

Word Count: 5


## NLTK - Python

In [None]:
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [None]:
from nltk import word_tokenize

def count_words_nltk(text):
    words = word_tokenize(text)
    return len(words)

text = "Let's explore NLP with Python!"
print("Word Count:", count_words_nltk(text))

Word Count: 7


## Spacy

In [None]:
import spacy

nlp = spacy.load("en_core_web_sm")

def count_words_spacy(text):
    doc = nlp(text)
    words = [token.text for token in doc if not token.is_punct]
    return len(words)

text = "NLP is amazing, isn't it?"
print("Word Count:", count_words_spacy(text))

Word Count: 6


## Counter

In [None]:
from collections import Counter

def word_frequency(text):
    words = text.lower().split()
    return Counter(words)

text = "NLP is fun. NLP is powerful. NLP is the future."
print("Word Frequencies:", word_frequency(text))

Word Frequencies: Counter({'nlp': 3, 'is': 3, 'fun.': 1, 'powerful.': 1, 'the': 1, 'future.': 1})


## NLP Task 1 - Short vs Long sentences

In [None]:
from nltk.tokenize import sent_tokenize

def categorize_sentences(text):
    sentences = sent_tokenize(text)
    categorized = {"short": [], "long": []}

    for sentence in sentences:
        word_count = len(word_tokenize(sentence))
        if word_count < 5:
            categorized["short"].append(sentence)
        else:
            categorized["long"].append(sentence)

    return categorized

text = "Hello! NLP is fun. Let's learn NLP together. It helps in many fields."
categories = categorize_sentences(text)
print("Short Sentences:", categories["short"])
print("Long Sentences:", categories["long"])

Short Sentences: ['Hello!', 'NLP is fun.']
Long Sentences: ["Let's learn NLP together.", 'It helps in many fields.']


## NLP Task 2 - Rule based Sentiment Analysis

In [None]:
import time

positive_words = {"happy", "good", "great", "excellent", "love", "amazing"}
negative_words = {"sad", "bad", "terrible", "hate", "awful", "worst"}

def sentiment_analysis(text):
    start_time = time.time()
    words = text.lower().split()
    pos_count = sum(1 for word in words if word in positive_words)
    neg_count = sum(1 for word in words if word in negative_words)

    sentiment = "Positive Sentiment" if pos_count > neg_count else "Negative Sentiment" if neg_count > pos_count else "Neutral Sentiment"

    end_time = time.time()
    execution_time = end_time - start_time

    return sentiment, execution_time

text = """
I'm happy you're the President, and thank you for bringing me home.
I have never been so proud to be an American citizen. Thank you, Mr. President.
"""
sentiment, exec_time = sentiment_analysis(text)
print("Sentiment:", sentiment)
print("Execution Time: {:.6f} seconds".format(exec_time))

Sentiment: Positive Sentiment
Execution Time: 0.000016 seconds
