<a href="https://colab.research.google.com/github/rimalsaksham07/AI_lab/blob/main/nlp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.corpus import stopwords, movie_reviews
from nltk import pos_tag, ne_chunk
from nltk import FreqDist
import random

# Download necessary NLTK data
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('vader_lexicon')
nltk.download('maxent_ne_chunker')
nltk.download('words')
nltk.download('movie_reviews')
nltk.download('averaged_perceptron_tagger') # Download the missing resource

# New input text for tokenization and analysis
example_string = "Artificial intelligence and machine learning are revolutionizing industries across the globe."
print("Sentence Tokenization:")
print(sent_tokenize(example_string))

print("Word Tokenization:")
words_in_example = word_tokenize(example_string)
print(words_in_example)

# Stop words removal
stop_words = set(stopwords.words("english"))
filtered_list = [word for word in words_in_example if word.casefold() not in stop_words]
print("Filtered List (without stopwords):")
print(filtered_list)

# Sentiment Analysis on a new sentence
sia = SentimentIntensityAnalyzer()
sentiment_text = "AI is rapidly becoming an integral part of modern technology."
print("Sentiment Analysis Scores:")
print(sia.polarity_scores(sentiment_text))

# Named Entity Recognition (NER)
ner_text = "Elon Musk launched SpaceX with the goal of colonizing Mars."
words_ner = word_tokenize(ner_text)
tagged_words = pos_tag(words_ner)
named_entities = ne_chunk(tagged_words)
print("Named Entities:")
print(named_entities)

# Frequency Distribution for new text
frq_sen = """In the future, self-driving cars will be a common sight on roads, and autonomous systems will manage various aspects of our daily lives."""
words_freq = word_tokenize(frq_sen)
frequency_distribution = FreqDist(words_freq)
print("Frequency Distribution Most Common Words:")
print(frequency_distribution.most_common(10))

# Sentiment classifier using movie reviews dataset
documents = [(list(movie_reviews.words(fileid)), category)
for category in movie_reviews.categories()
for fileid in movie_reviews.fileids(category)]
random.shuffle(documents)

def extract_features(words):
    return {word: True for word in words}

featuresets = [(extract_features(words), category) for (words, category) in documents]
train_set, test_set = featuresets[:1500], featuresets[1500:]
classifier = nltk.NaiveBayesClassifier.train(train_set)
accuracy = nltk.classify.accuracy(classifier, test_set)
print("Accuracy of Classifier:", accuracy)

# Classification of a new review
new_review = "The storyline was gripping and the visual effects were stunning."
new_features = extract_features(word_tokenize(new_review))
print("Classification of New Review:", classifier.classify(new_features))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!
[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


Sentence Tokenization:
['Artificial intelligence and machine learning are revolutionizing industries across the globe.']
Word Tokenization:
['Artificial', 'intelligence', 'and', 'machine', 'learning', 'are', 'revolutionizing', 'industries', 'across', 'the', 'globe', '.']
Filtered List (without stopwords):
['Artificial', 'intelligence', 'machine', 'learning', 'revolutionizing', 'industries', 'across', 'globe', '.']
Sentiment Analysis Scores:
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
Named Entities:
(S
  (PERSON Elon/NNP)
  (PERSON Musk/NNP)
  launched/VBD
  (ORGANIZATION SpaceX/NNP)
  with/IN
  the/DT
  goal/NN
  of/IN
  colonizing/VBG
  (PERSON Mars/NNP)
  ./.)
Frequency Distribution Most Common Words:
[(',', 2), ('will', 2), ('In', 1), ('the', 1), ('future', 1), ('self-driving', 1), ('cars', 1), ('be', 1), ('a', 1), ('common', 1)]
Accuracy of Classifier: 0.714
Classification of New Review: pos
