# Find polarity of unique products

### Install nltk packages

In [8]:
import nltk
nltk.download()

showing info https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml


True

### Import necessary libraries

In [6]:
import pandas as pd
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from textblob import TextBlob
stop = stopwords.words('english')

### Install TextBlob

In [None]:
pip install -U textblob

### Import csv file

In [11]:
df = pd.read_csv('cloths-rating.csv')
df.head(3)

Unnamed: 0,ProductID,UserID,Rating,Text
0,777,AV1YnR7wglJLPUi8IJmi,4,Great taffy at a great price.
1,767,AVpfpK8KLJeJML43BCuD,4,Absolutely wonderful - silky and sexy and comf...
2,1080,AVqkIdntQMlgsOJE6fuB,5,Love this dress! it's sooo pretty.


### Data Cleaning

In [12]:
uni_prod = df['ProductID'].unique()
uni_prod

array([ 777,  767, 1080, 1077, 1049,  847,  858, 1095, 1065,  853, 1120,
        697,  949, 1003,  684,  444, 1060, 1002,  862,  910,   89,  823,
       6969, 9696,  333, 8001,  369], dtype=int64)

In [13]:
def clear_text(text):
    word_tokens = word_tokenize(text)
    filtered_text = [w for w in word_tokens if not w in stop]
    try:
        return filtered_text
    except:
        return None
    
df['Clean_Text'] = df['Text'].apply(clear_text)
df.head()

Unnamed: 0,ProductID,UserID,Rating,Text,Clean_Text
0,777,AV1YnR7wglJLPUi8IJmi,4,Great taffy at a great price.,"[Great, taffy, great, price, .]"
1,767,AVpfpK8KLJeJML43BCuD,4,Absolutely wonderful - silky and sexy and comf...,"[Absolutely, wonderful, -, silky, sexy, comfor..."
2,1080,AVqkIdntQMlgsOJE6fuB,5,Love this dress! it's sooo pretty.,"[Love, dress, !, 's, sooo, pretty, .]"
3,1077,AVpfpK8KLJeJML43BCuD,3,I had such high hopes for this dress and reall...,"[I, high, hopes, dress, really, wanted, work, .]"
4,1049,AVpfpK8KLJeJML43BCuD,5,"I love, love, love this jumpsuit. it's fun, fl...","[I, love, ,, love, ,, love, jumpsuit, ., 's, f..."


### Sentimental Analysis

In [15]:
def sentiment_calc(clean_data):
    try:
        return TextBlob(str(clean_data)).sentiment.polarity
    except:
        return None
    
df['Polarity of Feedback'] = df['Clean_Text'].apply(sentiment_calc)
df.head()
df.to_csv('sentiment_score.csv', index=False)

# Use of Frequency Distribution and pos tag from nltk

### Frequency Distribution

In [18]:
from nltk.probability import FreqDist
a = "Tokenization is the process by which a large quantity of text is divided into smaller parts called tokens. These tokens are very useful for finding patterns and are considered as a base step for stemming and lemmatization. Tokenization also helps to substitute sensitive data elements with non-sensitive data elements. Natural language processing is used for building applications such as Text classification, intelligent chatbot, sentimental analysis, language translation, etc. It becomes vital to understand the pattern in the text to achieve the above-stated purpose. For the time being, don't worry about stemming and lemmatization but treat them as steps for textual data cleaning using NLP (Natural language processing). We will discuss stemming and lemmatization later in the tutorial. Tasks such as Text classification or spam filtering makes use of NLP along with deep learning libraries such as Keras and Tensorflow."
token_word = word_tokenize(a)
token_word = [w for w in token_word if not w in stop]
words_analysis = FreqDist(token_word)
words_analysis

FreqDist({'.': 8, ',': 5, 'stemming': 3, 'lemmatization': 3, 'data': 3, 'language': 3, 'Tokenization': 2, 'text': 2, 'tokens': 2, 'elements': 2, ...})

### pos tag

In [17]:
b = " Hello! Tokenization is the process by which a large quantity of text is divided into smaller parts called tokens. These tokens are very useful for finding patterns and are considered as a base step for stemming and lemmatization. Tokenization also helps to substitute sensitive data elements with non-sensitive data elements. Natural language processing is used for building applications such as Text classification, intelligent chatbot, sentimental analysis, language translation, etc. It becomes vital to understand the pattern in the text to achieve the above-stated purpose. For the time being, don't worry about stemming and lemmatization but treat them as steps for textual data cleaning using NLP (Natural language processing). We will discuss stemming and lemmatization later in the tutorial. Tasks such as Text classification or spam filtering makes use of NLP along with deep learning libraries such as Keras and Tensorflow."
words = word_tokenize(b)
words = [w for w in words if not w in stop]
tagged = nltk.pos_tag(words)
tagged

[('Hello', 'NN'),
 ('!', '.'),
 ('Tokenization', 'NN'),
 ('process', 'NN'),
 ('large', 'JJ'),
 ('quantity', 'NN'),
 ('text', 'NN'),
 ('divided', 'VBD'),
 ('smaller', 'JJR'),
 ('parts', 'NNS'),
 ('called', 'VBD'),
 ('tokens', 'NNS'),
 ('.', '.'),
 ('These', 'DT'),
 ('tokens', 'NNS'),
 ('useful', 'JJ'),
 ('finding', 'VBG'),
 ('patterns', 'NNS'),
 ('considered', 'VBN'),
 ('base', 'JJ'),
 ('step', 'NN'),
 ('stemming', 'VBG'),
 ('lemmatization', 'NN'),
 ('.', '.'),
 ('Tokenization', 'NNP'),
 ('also', 'RB'),
 ('helps', 'VBZ'),
 ('substitute', 'VB'),
 ('sensitive', 'JJ'),
 ('data', 'NNS'),
 ('elements', 'NNS'),
 ('non-sensitive', 'JJ'),
 ('data', 'NNS'),
 ('elements', 'NNS'),
 ('.', '.'),
 ('Natural', 'JJ'),
 ('language', 'NN'),
 ('processing', 'NN'),
 ('used', 'VBN'),
 ('building', 'NN'),
 ('applications', 'NNS'),
 ('Text', 'NNP'),
 ('classification', 'NN'),
 (',', ','),
 ('intelligent', 'JJ'),
 ('chatbot', 'NN'),
 (',', ','),
 ('sentimental', 'JJ'),
 ('analysis', 'NN'),
 (',', ','),
 ('lang

# Email and Phone number Validation

### Email Validation

In [24]:
import re

email = input("Enter your EmailID: ")
pattern1 = '^(\w|\.|\_|\-)+[@](\w|\_|\-|\.)+[.]\w{2,3}$'

matched_pattern = re.match(pattern1,email)

if matched_pattern:
    print("Valid Email")
else:
    print("Invalid Email")

Enter your EmailID: rinkal25@gmail.com
Valid Email


### Phone number Validation

In [32]:
phn_num = input("Enter your Phone number with Country Code: ")
pattern = '^\+91?[7-9][0-9]{9}$'

check_num = re.match(pattern,phn_num)

if check_num:
    print("Valid Phone Number")
else:
    print("Invalid Phone Number")

Enter your Phone number with Country Code: +915989898989
Invalid Phone Number
