<a href="https://colab.research.google.com/github/Hemkush/NLP_Practice/blob/main/NLP_Basics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import spacy

In [None]:
!python -m spacy download en_core_web_sm
nlp = spacy.load("en_core_web_sm")

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m80.3 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


Spacy pipeline

In [None]:
text = "A customer in New York last month joined our loyalty program."
doc = nlp(text)

In [None]:
# Tokenization
for token in doc:
    print(token.text)
print("\n")

# Filter stop words
print("Without stop words:")
filtered_tokens = [token.text for token in doc if not token.is_stop]
print(filtered_tokens)
print("\n")

# Part-of-Speech tagging(POS)
print("POS:")
for token in doc:
    print(token.text, token.pos_)
print("\n")

# Named Entity Recognition (NER)
print("NER:")
for ent in doc.ents:
    print(ent.text, ent.label_)
print("\n")

# Lemmatization
print("Lemmatization:")
lemmatized_tokens = [token.lemma_ for token in doc]
print(lemmatized_tokens)

A
customer
in
New
York
last
month
joined
our
loyalty
program
.


Without stop words:
['customer', 'New', 'York', 'month', 'joined', 'loyalty', 'program', '.']


POS:
A DET
customer NOUN
in ADP
New PROPN
York PROPN
last ADJ
month NOUN
joined VERB
our PRON
loyalty NOUN
program NOUN
. PUNCT


NER:
New York GPE
last month DATE


Lemmatization:
['a', 'customer', 'in', 'New', 'York', 'last', 'month', 'join', 'our', 'loyalty', 'program', '.']


#
Building a Processing Pipeline with a File

In [None]:
file_path = "/content/sample_data/sentiment_examples.txt"
with open(file_path, "r", encoding='utf-8') as file:
    sentiment_texts = file.readlines()

token_lists = []
filtered_tokens_lists = []
pos_lists = []
ner_lists = []
lemmatized_lists = []

for sentiment_text in sentiment_texts:
    doc = nlp(sentiment_text)

    tokens = [token.text for token in doc]
    token_lists.append(tokens)

    filtered_tokens = [token.text for token in doc if not token.is_stop]
    filtered_tokens_lists.append(filtered_tokens)

    pos = [(token.text, token.pos_) for token in doc]
    pos_lists.append(pos)

    ner = [(ent.text, ent.label_) for ent in doc.ents]
    ner_lists.append(ner)


results_df = pd.DataFrame({
    "Original Text": sentiment_texts,
    "Tokens": token_lists,
    "Filtered Tokens": filtered_tokens_lists,
    "POS": pos_lists,
    "NER": ner_lists
})

print(results_df)

                                       Original Text  \
0  "I love the new features of your product! It h...   
1  "The customer support was exceptional in New Y...   
2  "The quality of your service exceeded my expec...   
3  "I'm extremely satisfied with my purchase. The...   
4  "The user interface is intuitive and easy to n...   
5  "I had a positive experience shopping on your ...   
6  "Your company values customer feedback, and it...   
7  "The pricing is fair, and the value I get in r...   
8  "I appreciate the personalized recommendations...   
9  "The delivery was prompt, and the packaging wa...   

                                              Tokens  \
0  [", I, love, the, new, features, of, your, pro...   
1  [", The, customer, support, was, exceptional, ...   
2  [", The, quality, of, your, service, exceeded,...   
3  [", I, 'm, extremely, satisfied, with, my, pur...   
4  [", The, user, interface, is, intuitive, and, ...   
5  [", I, had, a, positive, experience, shoppin

# Export Data to CSV file


In [None]:
results_df.to_csv("sentiment_analysis_results.csv", index=False)
processed_data = pd.read_csv("sentiment_analysis_results.csv", encoding='latin-1')
processed_data.head()

Unnamed: 0,Original Text,Tokens,Filtered Tokens,POS,NER
0,"""I love the new features of your product! It h...","['""', 'I', 'love', 'the', 'new', 'features', '...","['""', 'love', 'new', 'features', 'product', '!...","[('""', 'PUNCT'), ('I', 'PRON'), ('love', 'VERB...",[]
1,"""The customer support was exceptional in New Y...","['""', 'The', 'customer', 'support', 'was', 'ex...","['""', 'customer', 'support', 'exceptional', 'N...","[('""', 'PUNCT'), ('The', 'DET'), ('customer', ...","[('New York', 'GPE')]"
2,"""The quality of your service exceeded my expec...","['""', 'The', 'quality', 'of', 'your', 'service...","['""', 'quality', 'service', 'exceeded', 'expec...","[('""', 'PUNCT'), ('The', 'DET'), ('quality', '...","[('Prague', 'GPE')]"
3,"""I'm extremely satisfied with my purchase. The...","['""', 'I', ""'m"", 'extremely', 'satisfied', 'wi...","['""', 'extremely', 'satisfied', 'purchase', '....","[('""', 'PUNCT'), ('I', 'PRON'), (""'m"", 'AUX'),...",[]
4,"""The user interface is intuitive and easy to n...","['""', 'The', 'user', 'interface', 'is', 'intui...","['""', 'user', 'interface', 'intuitive', 'easy'...","[('""', 'PUNCT'), ('The', 'DET'), ('user', 'NOU...",[]
