In [None]:
# NLP_PRACTICAL_2_FUNDAMENTAL_CONCEPTS_NLP
! pip install nltk

In [None]:
# import nltk
import nltk

###Concept 1. Phonology (Study of Sound Patterns - Concepts Covered: Phonemes, rhyming words.)
(i) Phonology deals with how sounds are structured in a language.
(ii) We use pronouncing (for English phonetics) to extract phonemes and rhyming words.


In [None]:
!pip install pronouncing

In [None]:
import pronouncing # Get phonemes for a word

In [None]:
# Define a word
# word1 ='Natural'
# word1='language'
word1='College'
phonemes = pronouncing.phones_for_word(word1)
print(f"Phonemes for '{word1}': {phonemes}") # Find rhyming words
rhymes = pronouncing.rhymes(word1)
print(f"Words that rhyme with '{word1}': {rhymes[:10]}")

### Concept 2. Morphology (Study of Word Formation - Tokenization, Lemmatization, POS tagging, Morphological analysis.)
(i) Morphology involves breaking words into morphemes (smallest units of meaning).
(ii) We use spacy for tokenization and lemmatization.


In [None]:
!pip install spacy

In [None]:
import spacy

###(i) 'en_core_web_sm' is an English language multi-task Convolutional Neural Network(CNN) trained on OntoNotes.
###(ii) It Assigns context-specific token vectors, POS tags, dependency parse, and named entities.

In [None]:
# To analyze morphology of a sentence load the spacy and the 'en_core_web_sm' using the object nlp
nlp = spacy.load("en_core_web_sm")

### To print a structured table containing Tokenization, Lemmatization, Part of Speech (POS) tagging, and Morphological features of each word in a given sentence.

In [None]:
# Example 1: Take a sentence
sentence = "The cats are playing happily."
doc = nlp(sentence)
# In natural language processing (NLP), a lemma is the root form of a word, or its dictionary form.
# Lemmatization is the process of breaking down words into their lemmas.
print("Token      | Lemma      | POS    | Morphology")
print("-" * 40)
for token in doc: print(f"{token.text:10} | {token.lemma_:10} | {token.pos_:6} | {token.morph}")


In [None]:
# Example 2: Take a sentence
sentence1 = "The brown fox is quick and he is jumping over the lazy dog"
doc = nlp(sentence1)
print("Token      | Lemma      | POS    | Morphology")
print("-" * 40)
for token in doc: print(f"{token.text:10} | {token.lemma_:10} | {token.pos_:6} | {token.morph}")


#### In Natural Language Processing (NLP), "AUX" stands for "Auxiliary" and refers to a part-of-speech tag used to identify auxiliary verbs, which are "helping verbs" that add grammatical information like tense, mood, or voice to a main verb in a sentence, such as "is" in "She is walking" or "has" in "She has finished walking."

### Concept 3. Syntax (Sentence Structure & Grammar – Sentence structure, dependency parsing.)
### Syntax focuses on how words are arranged to form grammatically correct sentences.
### We use dependency parsing with spacy.


In [None]:
from spacy import displacy
nlp = spacy.load("en_core_web_sm")
sentence = "The brown fox is very quick and he is jumping over the lazy dog."
doc = nlp(sentence) # Visualize syntax tree
displacy.serve(doc, style="dep")

### Concept 4. Semantics (Meaning of Words & Sentences – Word embeddings, semantic similarity.)
### Semantics involves understanding word meanings. Here we use word embeddings (Word2Vec) and calculate word similarity.


In [None]:
! pip install gensim

In [None]:
import gensim.downloader as api # Load a pre-trained Word2Vec model

In [None]:
# Create a model with the glove Word package
model = api.load("glove-wiki-gigaword-50")

In [None]:
# Pass the words
# word1 = "king"
# word2 = "queen"
word1 = "gain"
word2 = "bargain"
# word1 = "king"
# word2 = "queen"

In [None]:
similarity = model.similarity(word1, word2)

In [None]:
print(f"Semantic similarity between '{word1}' and '{word2}': {similarity:.4f}")

### Concept 5. Pragmatics (Context & Implicature – Context-based analysis, sentiment analysis.)
### Pragmatics deals with meaning in context.
### The program below classifies sentences based on sentiment to show contextual meaning.


In [None]:
!pip install textblob

In [None]:
# Import the TextBlob for block analysis
from textblob import TextBlob

In [None]:
# Example_1: Pass some text/ senetences
sentences = [ "I love this movie!", "The food was terrible.", "It's raining outside, take an umbrella." ]

In [None]:
for sentence in sentences:
  sentiment = TextBlob(sentence).sentiment.polarity
  context = "Positive" if sentiment > 0 else "Negative" if sentiment < 0 else "Neutral"
  print(f"Sentence: '{sentence}' | Sentiment: {context}")


In [None]:
# Example_2: Pass some text/ senetences
sentences_2 = [ "I love the indian food!", "The place is bad", "Repeat performance!!" ]

In [None]:
for sentence in sentences_2:
  sentiment_1 = TextBlob(sentence).sentiment.polarity
  context_1 = "Positive" if sentiment_1 > 0 else "Negative" if sentiment_1 < 0 else "Neutral"
  print(f"Sentence: '{sentence}' | Sentiment: {context_1}")

In [None]:
# Example_3: Pass some text/ senetences
sentences_3 = [ "I like Koren Movies!", "The Buidling is not in a good shape", "Everybody does the same thing" ]

In [None]:
for sentence in sentences_3:
  sentiment_2 = TextBlob(sentence).sentiment.polarity
  context_2 = "Positive" if sentiment_1 > 0 else "Negative" if sentiment_1 < 0 else "Neutral"
  print(f"Sentence: '{sentence}' | Sentiment: {context_2}")