<a href="https://colab.research.google.com/github/OmarMousaaaaa/Customer-Churn-Detection-Project-/blob/main/NLP%20Task%20.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk import pos_tag, ne_chunk

# Necessary NLTK resources
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('words')

# Text
text_data = [
    "The movie was fantastic and I loved every part of it about Egypt",
    "I hated the film, it was the worst I have ever seen",
    "The storyline was boring but the acting was brilliant",
    "An amazing movie with a great plot and incredible performances",
    "Egypt movie, I regret wasting my time on it",
    "The actors did a great job but the story lacked depth",
    "One of the best films I have seen in a long time, highly recommend it",
    "This film was just okay, not too bad but not great either",
    "Absolutely loved the movie, fantastic plot and wonderful cast",
    "The movie was disappointing, it did not live up to the hype"
]

# Enabling NLP tools
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
stemmer = PorterStemmer()


# Processes
for i in text_data:
    print("\nOriginal Sentence:", i)

    # Tokenization
    tokens = word_tokenize(i)
    print("Tokens:", tokens)

    # Stopword Removal
    filtered_tokens = []
    for word in tokens:
      if word.lower() not in stop_words:
        filtered_tokens.append(word)
    print("Filtered Tokens (without stopwords):", filtered_tokens)

    # Stemming and Lemmatization
    stemmed_words = []
    for word in filtered_tokens:
     stemmed_words.append(stemmer.stem(word))
    print("Stemmed Words:", stemmed_words)
    lemmatized_words = []
    for word in filtered_tokens:
      lemmatized_words.append(lemmatizer.lemmatize(word))
    print("Lemmatized Words:", lemmatized_words)

    # POS Tagging
    pos_tags = pos_tag(filtered_tokens)
    print("POS Tags:", pos_tags)

    # NER
    named_entities = ne_chunk(pos_tags)
    print("Named Entities:", named_entities)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!



Original Sentence: The movie was fantastic and I loved every part of it about Egypt
Tokens: ['The', 'movie', 'was', 'fantastic', 'and', 'I', 'loved', 'every', 'part', 'of', 'it', 'about', 'Egypt']
Filtered Tokens (without stopwords): ['movie', 'fantastic', 'loved', 'every', 'part', 'Egypt']
Stemmed Words: ['movi', 'fantast', 'love', 'everi', 'part', 'egypt']
Lemmatized Words: ['movie', 'fantastic', 'loved', 'every', 'part', 'Egypt']
POS Tags: [('movie', 'NN'), ('fantastic', 'JJ'), ('loved', 'VBN'), ('every', 'DT'), ('part', 'NN'), ('Egypt', 'NNP')]
Named Entities: (S
  movie/NN
  fantastic/JJ
  loved/VBN
  every/DT
  part/NN
  (PERSON Egypt/NNP))

Original Sentence: I hated the film, it was the worst I have ever seen
Tokens: ['I', 'hated', 'the', 'film', ',', 'it', 'was', 'the', 'worst', 'I', 'have', 'ever', 'seen']
Filtered Tokens (without stopwords): ['hated', 'film', ',', 'worst', 'ever', 'seen']
Stemmed Words: ['hate', 'film', ',', 'worst', 'ever', 'seen']
Lemmatized Words: ['hate