# OBJECTIVE :- Apply Hierarchical clustering on Amazon Food Reviews

In [2]:
# Importing libraries
import warnings
warnings.filterwarnings("ignore")

import sqlite3
import pandas as pd
import numpy as np
import nltk
import string
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.feature_extraction.text import CountVectorizer
from nltk.stem.porter import PorterStemmer

import re

import string
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.stem.wordnet import WordNetLemmatizer

from gensim.models import Word2Vec
from gensim.models import KeyedVectors
import pickle

# Loading Data

In [3]:
# using the SQLite Table to read data.
con1 = sqlite3.connect('database.sqlite')

# Eliminating neutral reviews i.e. those reviews with Score = 3
filtered_data = pd.read_sql_query(" SELECT * FROM Reviews WHERE Score != 3 ", con1)

# Give reviews with Score>3 a positive rating, and reviews with a score<3 a negative rating.
def polarity(x):
    if x < 3:
        return 'negative'
    return 'positive'

# Applying polarity function on Score column of filtered_data
filtered_data['Score'] = filtered_data['Score'].map(polarity)

print(filtered_data.shape)
filtered_data.head()

(525814, 10)


Unnamed: 0,Id,ProductId,UserId,ProfileName,HelpfulnessNumerator,HelpfulnessDenominator,Score,Time,Summary,Text
0,1,B001E4KFG0,A3SGXH7AUHU8GW,delmartian,1,1,positive,1303862400,Good Quality Dog Food,I have bought several of the Vitality canned d...
1,2,B00813GRG4,A1D87F6ZCVE5NK,dll pa,0,0,negative,1346976000,Not as Advertised,Product arrived labeled as Jumbo Salted Peanut...
2,3,B000LQOCH0,ABXLMWJIXXAIN,"Natalia Corres ""Natalia Corres""",1,1,positive,1219017600,"""Delight"" says it all",This is a confection that has been around a fe...
3,4,B000UA0QIQ,A395BORC6FGVXV,Karl,3,3,negative,1307923200,Cough Medicine,If you are looking for the secret ingredient i...
4,5,B006K2ZZ7K,A1UQRSCLF8GW1T,"Michael D. Bigham ""M. Wassir""",0,0,positive,1350777600,Great taffy,Great taffy at a great price. There was a wid...


# Data Cleaning: Deduplication

In [4]:
#Sorting data according to ProductId in ascending order
sorted_data=filtered_data.sort_values('ProductId', axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last')

#Deduplication of entries
final=sorted_data.drop_duplicates(subset={"UserId","ProfileName","Time","Text"}, keep='first', inplace=False)
print(final.shape)

#Checking to see how much % of data still remains
((final.shape[0]*1.0)/(filtered_data.shape[0]*1.0)*100)

(364173, 10)


69.25890143662969

In [5]:
# Removing rows where HelpfulnessNumerator is greater than HelpfulnessDenominator
final = final[final.HelpfulnessNumerator <= final.HelpfulnessDenominator]

print(final.shape)
final[30:50]

(364171, 10)


Unnamed: 0,Id,ProductId,UserId,ProfileName,HelpfulnessNumerator,HelpfulnessDenominator,Score,Time,Summary,Text
138683,150501,0006641040,AJ46FKXOVC7NR,Nicholas A Mesiano,2,2,positive,940809600,This whole series is great way to spend time w...,I can remember seeing the show when it aired o...
138676,150493,0006641040,AMX0PJKV4PPNJ,"E. R. Bird ""Ramseelbird""",71,72,positive,1096416000,Read it once. Read it twice. Reading Chicken S...,"These days, when a person says, ""chicken soup""..."
138682,150500,0006641040,A1IJKK6Q1GTEAY,A Customer,2,2,positive,1009324800,It Was a favorite!,This was a favorite book of mine when I was a ...
138681,150499,0006641040,A3E7R866M94L0C,"L. Barker ""simienwolf""",2,2,positive,1065830400,Can't explain why,This book has been a favorite of mine since I ...
476617,515426,141278509X,AB1A5EGHHVA9M,CHelmic,1,1,positive,1332547200,The best drink mix,This product by Archer Farms is the best drink...
22621,24751,2734888454,A1C298ITT645B6,Hugh G. Pritchard,0,0,positive,1195948800,Dog Lover Delites,Our dogs just love them. I saw them in a pet ...
22620,24750,2734888454,A13ISQV0U9GZIC,Sandikaye,1,1,negative,1192060800,made in china,My dogs loves this chicken but its a product f...
284375,308077,2841233731,A3QD68O22M2XHQ,LABRNTH,0,0,positive,1345852800,Great recipe book for my babycook,This book is easy to read and the ingredients ...
157850,171161,7310172001,AFXMWPNS1BLU4,H. Sandler,0,0,positive,1229385600,Excellent treats,I have been feeding my greyhounds these treats...
157849,171160,7310172001,A74C7IARQEM1R,stucker,0,0,positive,1230076800,Sophie's Treats,This is one product that my welsh terrier can ...


OBSERVATION :- Here books with ProductId - 0006641040 and 2841233731 are also there so we have to remove all these rows with these ProductIds from the data

In [6]:
final = final[final['ProductId'] != '2841233731']
final = final[final['ProductId'] != '0006641040']
final.shape

(364136, 10)

# Text Preprocessing: Stemming, stop-word removal and Lemmatization.

In [7]:
#set of stopwords in English
from nltk.corpus import stopwords
stop = set(stopwords.words('english'))
words_to_keep = set(('not'))
stop -= words_to_keep
#initialising the snowball stemmer
sno = nltk.stem.SnowballStemmer('english')

 #function to clean the word of any html-tags
def cleanhtml(sentence):
    cleanr = re.compile('<.*?>')
    cleantext = re.sub(cleanr, ' ', sentence)
    return cleantext

#function to clean the word of any punctuation or special characters
def cleanpunc(sentence): 
    cleaned = re.sub(r'[?|!|\'|"|#]',r'',sentence)
    cleaned = re.sub(r'[.|,|)|(|\|/]',r' ',cleaned)
    return  cleaned

In [8]:
#Code for removing HTML tags , punctuations . Code for removing stopwords . Code for checking if word is not alphanumeric and
# also greater than 2 . Code for stemmimg and also to convert them to lowercase letters 
i=0
str1=' '
final_string=[]
all_positive_words=[] # store words from +ve reviews here
all_negative_words=[] # store words from -ve reviews here.
s=''
for sent in final['Text'].values:
    filtered_sentence=[]
    #print(sent);
    sent=cleanhtml(sent) # remove HTMl tags
    for w in sent.split():
        for cleaned_words in cleanpunc(w).split():
            if((cleaned_words.isalpha()) & (len(cleaned_words)>2)):    
                if(cleaned_words.lower() not in stop):
                    s=(sno.stem(cleaned_words.lower())).encode('utf8')
                    filtered_sentence.append(s)
                    if (final['Score'].values)[i] == 'positive': 
                        all_positive_words.append(s) #list of all words used to describe positive reviews
                    if(final['Score'].values)[i] == 'negative':
                        all_negative_words.append(s) #list of all words used to describe negative reviews reviews
                else:
                    continue
            else:
                continue 
    
    str1 = b" ".join(filtered_sentence) #final string of cleaned words
    
    
    final_string.append(str1)
    i+=1

In [9]:
#adding a column of CleanedText which displays the data after pre-processing of the review
final['CleanedText']=final_string  
final['CleanedText']=final['CleanedText'].str.decode("utf-8")
#below the processed review can be seen in the CleanedText Column 
print('Shape of final',final.shape)
final.head()

Shape of final (364136, 11)


Unnamed: 0,Id,ProductId,UserId,ProfileName,HelpfulnessNumerator,HelpfulnessDenominator,Score,Time,Summary,Text,CleanedText
476617,515426,141278509X,AB1A5EGHHVA9M,CHelmic,1,1,positive,1332547200,The best drink mix,This product by Archer Farms is the best drink...,product archer farm best drink mix ever mix fl...
22621,24751,2734888454,A1C298ITT645B6,Hugh G. Pritchard,0,0,positive,1195948800,Dog Lover Delites,Our dogs just love them. I saw them in a pet ...,dog love saw pet store tag attach regard made ...
22620,24750,2734888454,A13ISQV0U9GZIC,Sandikaye,1,1,negative,1192060800,made in china,My dogs loves this chicken but its a product f...,dog love chicken product china wont buy anymor...
157850,171161,7310172001,AFXMWPNS1BLU4,H. Sandler,0,0,positive,1229385600,Excellent treats,I have been feeding my greyhounds these treats...,feed greyhound treat year hound littl finicki ...
157849,171160,7310172001,A74C7IARQEM1R,stucker,0,0,positive,1230076800,Sophie's Treats,This is one product that my welsh terrier can ...,one product welsh terrier eat sophi food alerg...


RANDOMLY SAMPLING 5K POINTS OUT OF WHOLE DATASET

In [10]:
##Sorting data according to Time in ascending order for Time Based Splitting
time_sorted_data = final.sort_values('Time', axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last')

# We will collect different 40K rows without repetition from time_sorted_data dataframe
my_final = time_sorted_data.take(np.random.permutation(len(final))[:5000])

x = my_final['CleanedText'].values

# (1). Bag of Words (BoW)

In [11]:
#BoW
count_vect = CountVectorizer(min_df = 100) 
data = count_vect.fit_transform(x)
print("the type of count vectorizer :",type(data))
print("the shape of out text BOW vectorizer : ",data.get_shape())
print("the number of unique words :", data.get_shape()[1])

the type of count vectorizer : <class 'scipy.sparse.csr.csr_matrix'>
the shape of out text BOW vectorizer :  (5000, 347)
the number of unique words : 347


# Hierarchical Clustering with 2 clusters

In [12]:
from sklearn.cluster import AgglomerativeClustering

model = AgglomerativeClustering(n_clusters=2).fit(data.toarray())

reviews = my_final['Text'].values
# Getting all the reviews in different clusters
cluster1 = []
cluster2 = []

for i in range(model.labels_.shape[0]):
    if model.labels_[i] == 0:
        cluster1.append(reviews[i])
    else :
        cluster2.append(reviews[i])
 
        
# Number of reviews in different clusters
print("No. of reviews in Cluster-1 : ",len(cluster1))
print("\nNo. of reviews in Cluster-2 : ",len(cluster2))

No. of reviews in Cluster-1 :  4824

No. of reviews in Cluster-2 :  176


READING REVIEWS MANUALLY:

In [13]:
# Three Reviews of cluster 1
count=1
for i in range(3):
    if i < len(cluster1):
        print('Review-%d : \n %s\n'%(count,cluster1[i]))
        count +=1

Review-1 : 
 I first tried this product on Princess cruise and since bought it online from Amazon.  I like how there are 25 bags which retains its freshness.  I will definitely be purchasing this product on a continuous basis for my coffee each morning!

Review-2 : 
 I made crab rangoon and used this sauce as a dipping sauce.  It was great.  I love the fact that it doesn't have MSG in it.  It has the right amount of sweetness.  It is a little spicy, but just the right amount.

Review-3 : 
 Awful Awful taste...and phosphoric sick color...<br />Awful Awful taste...and phosphoric sick color...<br />Awful Awful taste...and phosphoric sick color...<br /><br />I was totally schemed, this is not a drink. it is an acid poison.<br />Who for god sake manufacture and sale this kind of awful product.<br /><br />Don't believe it? try for your self.<br />I only show "The king is naked"



In [14]:
# Three Reviews of cluster 2
count=1
for i in range(3):
    if i < len(cluster2):
        print('Review-%d : \n %s\n'%(count,cluster2[i]))
        count +=1

Review-1 : 
 Some of the finest tea I've had.  It is a pleasure on the palette, as well as to the nose.  In addition to the wonderful taste and aroma, this tea (The Choice Organic Tea line) is "Fair Trade".<br /><br />This tea is caffeine free and delicious hot or cold. --> i have added 2 bags when brewing in the sun for iced tea.  Delicious, when sweetened with natural honey!<br /><br />It is important to believe in a product, and this one uses as many Fair Trade ingredients as possible.

Review-2 : 
 When I read comments that this tea was similar to Earl Gray I decided to try it.  It is nothing like Earl Gray and it does have a flavor and odor that is tough to get used to.  I haven't had any trouble with any of the tea bags breaking open like some of the raters.

Review-3 : 
 Twinings English Afternoon Tea is a superb hot tea, delicious with milk and sugar, with a full, somewhat fruity flavor. Do not be afraid that this is a "watered-down" or weak version of the Breakfast Tea! It is 

# Hierarchical Clustering with 5 clusters

In [15]:
model = AgglomerativeClustering(n_clusters=5).fit(data.toarray())

# Getting all the reviews in different clusters
cluster1 = []
cluster2 = []
cluster3 = []
cluster4 = []
cluster5 = []

for i in range(model.labels_.shape[0]):
    if model.labels_[i] == 0:
        cluster1.append(reviews[i])
    elif model.labels_[i] == 1:
        cluster2.append(reviews[i])
    elif model.labels_[i] == 2:
        cluster3.append(reviews[i])
    elif model.labels_[i] == 3:
        cluster4.append(reviews[i])
    else :
        cluster5.append(reviews[i]) 
        
# Number of reviews in different clusters
print("No. of reviews in Cluster-1 : ",len(cluster1))
print("\nNo. of reviews in Cluster-2 : ",len(cluster2))
print("\nNo. of reviews in Cluster-3 : ",len(cluster3))
print("\nNo. of reviews in Cluster-4 : ",len(cluster4))
print("\nNo. of reviews in Cluster-5 : ",len(cluster5)) 

No. of reviews in Cluster-1 :  1537

No. of reviews in Cluster-2 :  176

No. of reviews in Cluster-3 :  302

No. of reviews in Cluster-4 :  2984

No. of reviews in Cluster-5 :  1


READING REVIEWS MANUALLY:

In [16]:
# Three Reviews of cluster 1
count=1
for i in range(3):
    if i < len(cluster1):
        print('Review-%d : \n %s\n'%(count,cluster1[i]))
        count +=1

Review-1 : 
 Awful Awful taste...and phosphoric sick color...<br />Awful Awful taste...and phosphoric sick color...<br />Awful Awful taste...and phosphoric sick color...<br /><br />I was totally schemed, this is not a drink. it is an acid poison.<br />Who for god sake manufacture and sale this kind of awful product.<br /><br />Don't believe it? try for your self.<br />I only show "The king is naked"

Review-2 : 
 I should have listened to the other one star reviewer. My antler was NOT like the picture. Unfortunately, I too got half an antler - meaning it was half of a bisected piece. This exposes all of the marrow, gives your dog no challenge or time with the antler, and may cause an upset system / stomach for the dog. I won't buy this brand again.

Review-3 : 
 I ordered this product to make white, strawberry-flavored icing for some cupcakes for a wedding. since it was so expensive, i did not test it to make sure it actually tasted like strawberry before baking day. My Mistake! It sme

In [17]:
# Three Reviews of cluster 2
count=1
for i in range(3):
    if i < len(cluster2):
        print('Review-%d : \n %s\n'%(count,cluster2[i]))
        count +=1

Review-1 : 
 Some of the finest tea I've had.  It is a pleasure on the palette, as well as to the nose.  In addition to the wonderful taste and aroma, this tea (The Choice Organic Tea line) is "Fair Trade".<br /><br />This tea is caffeine free and delicious hot or cold. --> i have added 2 bags when brewing in the sun for iced tea.  Delicious, when sweetened with natural honey!<br /><br />It is important to believe in a product, and this one uses as many Fair Trade ingredients as possible.

Review-2 : 
 When I read comments that this tea was similar to Earl Gray I decided to try it.  It is nothing like Earl Gray and it does have a flavor and odor that is tough to get used to.  I haven't had any trouble with any of the tea bags breaking open like some of the raters.

Review-3 : 
 Twinings English Afternoon Tea is a superb hot tea, delicious with milk and sugar, with a full, somewhat fruity flavor. Do not be afraid that this is a "watered-down" or weak version of the Breakfast Tea! It is 

In [18]:
# Three Reviews of cluster 3
count=1
for i in range(3):
    if i < len(cluster3):
        print('Review-%d : \n %s\n'%(count,cluster3[i]))
        count +=1

Review-1 : 
 I thought this coffee was too weak, and had a slightly sour aftertaste. I do prefer a bolder coffee, so perhaps someone who likes a lighter roast would enjoy it; I did not.<br /><br />For those with a Solofill, I tried this coffee using the refillable cup in my Keurig brewer, but it didn't work too well. I didn't see anything about the grind in the product description (maybe I just overlooked it), but this coffee is ground finely. If you do use it in the Solofill, spoon it in so it is very loose, then shake, or the water won't flow through well.

Review-2 : 
 If you're looking for a delicious, smooth, medium roast coffee blended with a good dose of chocolate flavor, this Gevalia coffee might be right up your alley! It brews up delicious, smelling strongly of chocolate, and tasting satisfyingly of it, too. There's no need to add anything to this flavored coffee, if you're looking for a taste that enhances the flavor of the coffee without overpowering it and making it taste 

In [19]:
# Three Reviews of cluster 4
count=1
for i in range(3):
    if i < len(cluster4):
        print('Review-%d : \n %s\n'%(count,cluster4[i]))
        count +=1

Review-1 : 
 I first tried this product on Princess cruise and since bought it online from Amazon.  I like how there are 25 bags which retains its freshness.  I will definitely be purchasing this product on a continuous basis for my coffee each morning!

Review-2 : 
 I made crab rangoon and used this sauce as a dipping sauce.  It was great.  I love the fact that it doesn't have MSG in it.  It has the right amount of sweetness.  It is a little spicy, but just the right amount.

Review-3 : 
 This is a really nice product for those who want to consume healthful things. Refreshing, tasty and guiltless beverages are too few and far between these days, but this qualifies.<br /><br />Far too many "vitamin drinks" on the shelf are packed with empty calories, high-fructose corn syrup, and no actual fruit juice (or anything else good for you, for that matter). This is a refreshing exception.<br /><br />The pros: 70% juice, not overly calorie-ridden, the Izze drinks DO contain a bit of Vitamins C

In [20]:
# Three Reviews of cluster 5
count=1
for i in range(3):
    if i < len(cluster5):
        print('Review-%d : \n %s\n'%(count,cluster5[i]))
        count +=1

Review-1 : 
 Fuzzy Wuzzy's Summary:<br />**** Recommended with warm fuzzies.<br /><br />I just received my single 2.25-ounce bar this morning and it was a good thing that I already ate breakfast as I nibbled (instead of chomped) on this with my morning coffee.  I have always liked all of Newman's line of foods, because they taste good and are made of high-quality ingredients, and because his foundation donates hundreds of millions of dollars to charities throughout the world.  (And I love his movies! :-)  Paul Newman walked the philanthropic walk in helping to make the world a better place.<br /><br />This 54% cocoa dark chocolate bar has an all-organic ingredient list that is free of artificial sweetener and extraneous additives and fillers, which is all too common in chocolates, candies, and foods these days.<br /><br />Note that, at the time of this review's writing, the "Ingredients" and "Nutrition Facts" listed on Amazon's site for this dark chocolate bar are actually copied from 

# Hierarchical Clustering with 10 clusters

In [21]:
model = AgglomerativeClustering(n_clusters=10).fit(data.toarray())
# Getting all the reviews in different clusters
cluster1 = []
cluster2 = []
cluster3 = []
cluster4 = []
cluster5 = []
cluster6 = []
cluster7 = []
cluster8 = []
cluster9 = []
cluster10 = []

for i in range(model.labels_.shape[0]):
    if model.labels_[i] == 0:
        cluster1.append(reviews[i])
    elif model.labels_[i] == 1:
        cluster2.append(reviews[i])
    elif model.labels_[i] == 2:
        cluster3.append(reviews[i])
    elif model.labels_[i] == 3:
        cluster4.append(reviews[i])
    elif model.labels_[i] == 4:
        cluster5.append(reviews[i])
    elif model.labels_[i] == 5:
        cluster6.append(reviews[i])
    elif model.labels_[i] == 6:
        cluster7.append(reviews[i])
    elif model.labels_[i] == 7:
        cluster8.append(reviews[i])
    elif model.labels_[i] == 8:
        cluster9.append(reviews[i])       
    else :
        cluster10.append(reviews[i])

In [22]:
# Number of reviews in different clusters
print("No. of reviews in Cluster-1 : ",len(cluster1))
print("\nNo. of reviews in Cluster-2 : ",len(cluster2))
print("\nNo. of reviews in Cluster-3 : ",len(cluster3))
print("\nNo. of reviews in Cluster-4 : ",len(cluster4))
print("\nNo. of reviews in Cluster-5 : ",len(cluster5)) 
print("\nNo. of reviews in Cluster-6 : ",len(cluster6))
print("\nNo. of reviews in Cluster-7 : ",len(cluster7))
print("\nNo. of reviews in Cluster-8 : ",len(cluster8))
print("\nNo. of reviews in Cluster-9 : ",len(cluster9))
print("\nNo. of reviews in Cluster-10 : ",len(cluster10)) 

No. of reviews in Cluster-1 :  1174

No. of reviews in Cluster-2 :  2984

No. of reviews in Cluster-3 :  308

No. of reviews in Cluster-4 :  48

No. of reviews in Cluster-5 :  21

No. of reviews in Cluster-6 :  18

No. of reviews in Cluster-7 :  34

No. of reviews in Cluster-8 :  254

No. of reviews in Cluster-9 :  158

No. of reviews in Cluster-10 :  1


READING REVIEWS MANUALLY:

In [23]:
# Three Reviews of cluster 1
count=1
for i in range(3):
    if i < len(cluster1):
        print('Review-%d : \n %s\n'%(count,cluster1[i]))
        count +=1

Review-1 : 
 Awful Awful taste...and phosphoric sick color...<br />Awful Awful taste...and phosphoric sick color...<br />Awful Awful taste...and phosphoric sick color...<br /><br />I was totally schemed, this is not a drink. it is an acid poison.<br />Who for god sake manufacture and sale this kind of awful product.<br /><br />Don't believe it? try for your self.<br />I only show "The king is naked"

Review-2 : 
 I ordered this product to make white, strawberry-flavored icing for some cupcakes for a wedding. since it was so expensive, i did not test it to make sure it actually tasted like strawberry before baking day. My Mistake! It smelled like bad bubblegum and tasted like Windex or something - just a terrible, chemical taste. i made the icing, then tasted it and thought "what was on my finger!? nail polish remover or something?" well I hadn't done my nails, it was the icing! i could not serve that. I had to start over and use vanilla at the last minute. Thankfully the bride was very

In [24]:
# Three Reviews of cluster 2
count=1
for i in range(3):
    if i < len(cluster2):
        print('Review-%d : \n %s\n'%(count,cluster2[i]))
        count +=1

Review-1 : 
 I first tried this product on Princess cruise and since bought it online from Amazon.  I like how there are 25 bags which retains its freshness.  I will definitely be purchasing this product on a continuous basis for my coffee each morning!

Review-2 : 
 I made crab rangoon and used this sauce as a dipping sauce.  It was great.  I love the fact that it doesn't have MSG in it.  It has the right amount of sweetness.  It is a little spicy, but just the right amount.

Review-3 : 
 This is a really nice product for those who want to consume healthful things. Refreshing, tasty and guiltless beverages are too few and far between these days, but this qualifies.<br /><br />Far too many "vitamin drinks" on the shelf are packed with empty calories, high-fructose corn syrup, and no actual fruit juice (or anything else good for you, for that matter). This is a refreshing exception.<br /><br />The pros: 70% juice, not overly calorie-ridden, the Izze drinks DO contain a bit of Vitamins C

In [25]:
# Three Reviews of cluster 3
count=1
for i in range(3):
    if i < len(cluster3):
        print('Review-%d : \n %s\n'%(count,cluster3[i]))
        count +=1

Review-1 : 
 I should have listened to the other one star reviewer. My antler was NOT like the picture. Unfortunately, I too got half an antler - meaning it was half of a bisected piece. This exposes all of the marrow, gives your dog no challenge or time with the antler, and may cause an upset system / stomach for the dog. I won't buy this brand again.

Review-2 : 
 Given that our dogs don't get rawhide treats often, we thought they might like to try these. However, both our 70 lb Aussie and 10 lb. Papillon rejected them. The Aussie eventually gave in and tried to bury one in the back yard, but never bothered to cover the hole. This is pretty important, because our boys will eat nearly anything! These were a nice size for small or large dogs, but neither seemed interested, despite that fact.  That said, these two dogs of ours love dehydrated cow lungs and organic carrots for snacks, so perhaps their pallates are just too refined for cardboard filled rawhide chews.

Review-3 : 
 This is

In [26]:
# Three Reviews of cluster 4
count=1
for i in range(3):
    if i < len(cluster4):
        print('Review-%d : \n %s\n'%(count,cluster4[i]))
        count +=1

Review-1 : 
 If you're looking for a delicious, smooth, medium roast coffee blended with a good dose of chocolate flavor, this Gevalia coffee might be right up your alley! It brews up delicious, smelling strongly of chocolate, and tasting satisfyingly of it, too. There's no need to add anything to this flavored coffee, if you're looking for a taste that enhances the flavor of the coffee without overpowering it and making it taste like something else.<br /><br />If you're a chocolate-loving coffee drinker that enjoys a hot mug of joe in the morning, this might just be the perfect coffee for you - chocolatey flavor that wakes you up, AND gives you your jump start! What could be better? Enjoy!

Review-2 : 
 I have tried many different coffee's over the past severeal years and I am a hard customer to please. I ran across this coffee at my son's house and loved it. I love a bold coffee and this one tasted better than any of the coffee's I have had to date so I decided to join the coffee clu

In [27]:
# Three Reviews of cluster 5
count=1
for i in range(3):
    if i < len(cluster5):
        print('Review-%d : \n %s\n'%(count,cluster5[i]))
        count +=1

Review-1 : 
 What a waste of money. All four cats (three indoor and one feral outdoor cat) won't eat the wet food. They like the juice and will lick that up, but they leave the meaty bits behind. I tried each flavor and not one cat will eat this stuff! I was hoping to save some money with Friskies, but it is what it is. And what it is is food that a hungry wild cat won't eat; she'd rather catch and eat mice. :\ My suggestion is to go to the store and buy a couple different cans and try that out first. If your cat doesn't eat it, it'll save you from having 2 dozen paper weights.

Review-2 : 
 After our father died, my sister inherited Duke, our dad's 4 month old orange tabby kitten.  My sister kept him for over a year, but decided she couldn't keep him anymore, so in 2010, I grudgingly adopted him.  (I'm sentimental and couldn't let a cat that once belonged to our dad end up in the hands of strangers.)  I am a rabid dog fanatic.  I like cats, but don't have the same enthusiasm for them 

In [28]:
# Three Reviews of cluster 6
count=1
for i in range(3):
    if i < len(cluster6):
        print('Review-%d : \n %s\n'%(count,cluster6[i]))
        count +=1

Review-1 : 
 I've been a tea fan for decades; I've written about tea and published stories about tea. I can tell you about second flush Darjeelings and "tippy golden" varieties. Like most self-styled "serious" tea drinkers, I've never been a fan of bagged tea, preferring to brew from "loose" leaves.<br /><br />Given all of that, I have to say that the Higgins & Burke was a very nice surprise. English Breakfast can mean a lot of things, but the leading element of the blend is usually Assam tea, which is known for its supposedly "malty" flavor. The H&B lists Assam as a main ingredient, which was good to see.<br /><br />The bags have a "flow through" design, which is also a good thing; this gives more surface area to the bags and allows for fuller brewing.<br /><br />I brewed up a pot--- not a cup. These tea bags really ought to be put in a pot. If you brew them in a cup, unless you cover it the water will cool too quickly during brewing, and besides, you'll have to reduce the brewing tim

In [29]:
# Three Reviews of cluster 7
count=1
for i in range(3):
    if i < len(cluster7):
        print('Review-%d : \n %s\n'%(count,cluster7[i]))
        count +=1

Review-1 : 
 First of all, I have no ties with Truvia. In fact, I decided to replace my Truvia recently with Stevia in the Raw which is why I'm now typing this. In my grocery store the Stevia in the Raw was cheaper. I'd always felt Truvia expensive, so I was all over that.<br /><br />While I personally do not believe the phrase "You get what you pay for" since sometimes a consumer is only paying for more marketing in a more expensive product...in this case, it's true.<br /><br />I do like sugar in the raw and I do buy it. It's way better than regular sugar being bleached and processed through bone char (for reals).<br /><br />But, first of all, this isn't "in the raw". That would mean it's pure Stevia. It also contains Dextrose. Dextrose is a very cheap additive that's sweet that's made from corn. It's high carb. Great if you just finished a tough workout and need carbs. Not great if you call yourself Stevia in the RAW<br /><br />Having formerly worked in food chemical sales, I know th

In [30]:
# Three Reviews of cluster 8
count=1
for i in range(3):
    if i < len(cluster8):
        print('Review-%d : \n %s\n'%(count,cluster8[i]))
        count +=1

Review-1 : 
 I thought this coffee was too weak, and had a slightly sour aftertaste. I do prefer a bolder coffee, so perhaps someone who likes a lighter roast would enjoy it; I did not.<br /><br />For those with a Solofill, I tried this coffee using the refillable cup in my Keurig brewer, but it didn't work too well. I didn't see anything about the grind in the product description (maybe I just overlooked it), but this coffee is ground finely. If you do use it in the Solofill, spoon it in so it is very loose, then shake, or the water won't flow through well.

Review-2 : 
 Great coffee flavor in a decaf blend, and I like my coffee. And the convenience of the K cup.

Review-3 : 
 The coffee arrived quickly. It has a good flavor. I'm not sure what the "(Misc.)" means in the title. I thought it meant a variet pack, but all the K cups are the same. I'm glad they didn't end up being a mild brew or decaf.<br />The k cups are loose in the box, so there were a few grounds on the bottom of the b

In [31]:
# Three Reviews of cluster 9
count=1
for i in range(3):
    if i < len(cluster9):
        print('Review-%d : \n %s\n'%(count,cluster9[i]))
        count +=1

Review-1 : 
 Some of the finest tea I've had.  It is a pleasure on the palette, as well as to the nose.  In addition to the wonderful taste and aroma, this tea (The Choice Organic Tea line) is "Fair Trade".<br /><br />This tea is caffeine free and delicious hot or cold. --> i have added 2 bags when brewing in the sun for iced tea.  Delicious, when sweetened with natural honey!<br /><br />It is important to believe in a product, and this one uses as many Fair Trade ingredients as possible.

Review-2 : 
 When I read comments that this tea was similar to Earl Gray I decided to try it.  It is nothing like Earl Gray and it does have a flavor and odor that is tough to get used to.  I haven't had any trouble with any of the tea bags breaking open like some of the raters.

Review-3 : 
 Twinings English Afternoon Tea is a superb hot tea, delicious with milk and sugar, with a full, somewhat fruity flavor. Do not be afraid that this is a "watered-down" or weak version of the Breakfast Tea! It is 

In [32]:
# Three Reviews of cluster 10
count=1
for i in range(3):
    if i < len(cluster10):
        print('Review-%d : \n %s\n'%(count,cluster10[i]))
        count +=1

Review-1 : 
 Fuzzy Wuzzy's Summary:<br />**** Recommended with warm fuzzies.<br /><br />I just received my single 2.25-ounce bar this morning and it was a good thing that I already ate breakfast as I nibbled (instead of chomped) on this with my morning coffee.  I have always liked all of Newman's line of foods, because they taste good and are made of high-quality ingredients, and because his foundation donates hundreds of millions of dollars to charities throughout the world.  (And I love his movies! :-)  Paul Newman walked the philanthropic walk in helping to make the world a better place.<br /><br />This 54% cocoa dark chocolate bar has an all-organic ingredient list that is free of artificial sweetener and extraneous additives and fillers, which is all too common in chocolates, candies, and foods these days.<br /><br />Note that, at the time of this review's writing, the "Ingredients" and "Nutrition Facts" listed on Amazon's site for this dark chocolate bar are actually copied from 

# (2) TFIDF

In [33]:
tf_idf_vect = TfidfVectorizer(min_df=100)
data = tf_idf_vect.fit_transform(x)
print("the type of count vectorizer :",type(data))
print("the shape of out text TFIDF vectorizer : ",data.get_shape())
print("the number of unique words :", data.get_shape()[1])

the type of count vectorizer : <class 'scipy.sparse.csr.csr_matrix'>
the shape of out text TFIDF vectorizer :  (5000, 347)
the number of unique words : 347


# Hierarchical Clustering with 2 clusters

In [34]:
model = AgglomerativeClustering(n_clusters=2).fit(data.toarray())

reviews = my_final['Text'].values
# Getting all the reviews in different clusters
cluster1 = []
cluster2 = []

for i in range(model.labels_.shape[0]):
    if model.labels_[i] == 0:
        cluster1.append(reviews[i])
    else :
        cluster2.append(reviews[i])
 
        
# Number of reviews in different clusters
print("No. of reviews in Cluster-1 : ",len(cluster1))
print("\nNo. of reviews in Cluster-2 : ",len(cluster2))

No. of reviews in Cluster-1 :  4743

No. of reviews in Cluster-2 :  257


READING REVIEWS MANUALLY:

In [35]:
# Three Reviews of cluster 1
count=1
for i in range(3):
    print('Review-%d : \n %s\n'%(count,cluster1[i]))
    count +=1

Review-1 : 
 I first tried this product on Princess cruise and since bought it online from Amazon.  I like how there are 25 bags which retains its freshness.  I will definitely be purchasing this product on a continuous basis for my coffee each morning!

Review-2 : 
 I made crab rangoon and used this sauce as a dipping sauce.  It was great.  I love the fact that it doesn't have MSG in it.  It has the right amount of sweetness.  It is a little spicy, but just the right amount.

Review-3 : 
 Awful Awful taste...and phosphoric sick color...<br />Awful Awful taste...and phosphoric sick color...<br />Awful Awful taste...and phosphoric sick color...<br /><br />I was totally schemed, this is not a drink. it is an acid poison.<br />Who for god sake manufacture and sale this kind of awful product.<br /><br />Don't believe it? try for your self.<br />I only show "The king is naked"



In [36]:
# Three Reviews of cluster 2
count=1
for i in range(3):
    if i < len(cluster2):
        print('Review-%d : \n %s\n'%(count,cluster2[i]))
        count +=1

Review-1 : 
 I have tried literally dozens of teas since being introduced to the Russian custom of preparing and drinking tea, and the Ahmad Imperial blend is probably my favorite of all of these due to the fact that it is equally delicious hot or iced.  I buy the three-packs as needed from Amazon and go through them rapidly. My family loves it and my friends have it in their homes now too, try one box of it and you will love it too.

Review-2 : 
 No idea why some people are saying this is bad... I guess it doesnt compare to the teas when you're actually in Thailand or something. As for me, this tastes just like the tea from every thai restaraunt i've been to... which is a good thing! No complaints from me , I found what I was looking for. Only giving 4 stars cause I reserve 5 for products which truly blow my mind

Review-3 : 
 Some of the finest tea I've had.  It is a pleasure on the palette, as well as to the nose.  In addition to the wonderful taste and aroma, this tea (The Choice O

# Hierarchical Clustering with 5 clusters

In [37]:
model = AgglomerativeClustering(n_clusters=5).fit(data.toarray())

# Getting all the reviews in different clusters
cluster1 = []
cluster2 = []
cluster3 = []
cluster4 = []
cluster5 = []

for i in range(model.labels_.shape[0]):
    if model.labels_[i] == 0:
        cluster1.append(reviews[i])
    elif model.labels_[i] == 1:
        cluster2.append(reviews[i])
    elif model.labels_[i] == 2:
        cluster3.append(reviews[i])
    elif model.labels_[i] == 3:
        cluster4.append(reviews[i])
    else :
        cluster5.append(reviews[i]) 
        
# Number of reviews in different clusters
print("No. of reviews in Cluster-1 : ",len(cluster1))
print("\nNo. of reviews in Cluster-2 : ",len(cluster2))
print("\nNo. of reviews in Cluster-3 : ",len(cluster3))
print("\nNo. of reviews in Cluster-4 : ",len(cluster4))
print("\nNo. of reviews in Cluster-5 : ",len(cluster5)) 

No. of reviews in Cluster-1 :  4146

No. of reviews in Cluster-2 :  212

No. of reviews in Cluster-3 :  289

No. of reviews in Cluster-4 :  257

No. of reviews in Cluster-5 :  96


READING REVIEWS MANUALLY:

In [38]:
# Three Reviews of cluster 1
count=1
for i in range(3):
    if i < len(cluster1):
        print('Review-%d : \n %s\n'%(count,cluster1[i]))
        count +=1

Review-1 : 
 I first tried this product on Princess cruise and since bought it online from Amazon.  I like how there are 25 bags which retains its freshness.  I will definitely be purchasing this product on a continuous basis for my coffee each morning!

Review-2 : 
 I made crab rangoon and used this sauce as a dipping sauce.  It was great.  I love the fact that it doesn't have MSG in it.  It has the right amount of sweetness.  It is a little spicy, but just the right amount.

Review-3 : 
 Awful Awful taste...and phosphoric sick color...<br />Awful Awful taste...and phosphoric sick color...<br />Awful Awful taste...and phosphoric sick color...<br /><br />I was totally schemed, this is not a drink. it is an acid poison.<br />Who for god sake manufacture and sale this kind of awful product.<br /><br />Don't believe it? try for your self.<br />I only show "The king is naked"



In [39]:
# Three Reviews of cluster 2
count=1
for i in range(3):
    if i < len(cluster2):
        print('Review-%d : \n %s\n'%(count,cluster2[i]))
        count +=1

Review-1 : 
 Given that our dogs don't get rawhide treats often, we thought they might like to try these. However, both our 70 lb Aussie and 10 lb. Papillon rejected them. The Aussie eventually gave in and tried to bury one in the back yard, but never bothered to cover the hole. This is pretty important, because our boys will eat nearly anything! These were a nice size for small or large dogs, but neither seemed interested, despite that fact.  That said, these two dogs of ours love dehydrated cow lungs and organic carrots for snacks, so perhaps their pallates are just too refined for cardboard filled rawhide chews.

Review-2 : 
 I must say these are the best puffed lamb ears we've tried and my dog who normally has a very sensitive stomach can eat as many as she wants and never gets ill from them. They are crunchy and fun and smell good too; all the things a dog likes in a treat. We buy them all the time.

Review-3 : 
 I have two golden retrievers with hearty appetites.  Feeding them re

In [40]:
# Three Reviews of cluster 3
count=1
for i in range(3):
    if i < len(cluster3):
        print('Review-%d : \n %s\n'%(count,cluster3[i]))
        count +=1

Review-1 : 
 I thought this coffee was too weak, and had a slightly sour aftertaste. I do prefer a bolder coffee, so perhaps someone who likes a lighter roast would enjoy it; I did not.<br /><br />For those with a Solofill, I tried this coffee using the refillable cup in my Keurig brewer, but it didn't work too well. I didn't see anything about the grind in the product description (maybe I just overlooked it), but this coffee is ground finely. If you do use it in the Solofill, spoon it in so it is very loose, then shake, or the water won't flow through well.

Review-2 : 
 I ordered a second case.  Makes great Dark and Stormies or very refreshing and medicinal cold out of the bottle.  My wife won't touch the stuff.  You got to like ginger or ginger ale. You will be disappointed if not.

Review-3 : 
 Usually do not get the breakfast blends but it was on sale and decided at price would try it out. It was not as weak as some breakfast blends tend to be. Not bad.



In [41]:
# Three Reviews of cluster 4
count=1
for i in range(3):
    if i < len(cluster4):
        print('Review-%d : \n %s\n'%(count,cluster4[i]))
        count +=1

Review-1 : 
 I have tried literally dozens of teas since being introduced to the Russian custom of preparing and drinking tea, and the Ahmad Imperial blend is probably my favorite of all of these due to the fact that it is equally delicious hot or iced.  I buy the three-packs as needed from Amazon and go through them rapidly. My family loves it and my friends have it in their homes now too, try one box of it and you will love it too.

Review-2 : 
 No idea why some people are saying this is bad... I guess it doesnt compare to the teas when you're actually in Thailand or something. As for me, this tastes just like the tea from every thai restaraunt i've been to... which is a good thing! No complaints from me , I found what I was looking for. Only giving 4 stars cause I reserve 5 for products which truly blow my mind

Review-3 : 
 Some of the finest tea I've had.  It is a pleasure on the palette, as well as to the nose.  In addition to the wonderful taste and aroma, this tea (The Choice O

In [42]:
# Three Reviews of cluster 5
count=1
for i in range(3):
    if i < len(cluster5):
        print('Review-%d : \n %s\n'%(count,cluster5[i]))
        count +=1

Review-1 : 
 This is the best cat food for my feral cats! The 40 lb size is a great buy, and the cat food is good for them! you really can't beat the price for decent food for feeding a lot of cats!

Review-2 : 
 What a waste of money. All four cats (three indoor and one feral outdoor cat) won't eat the wet food. They like the juice and will lick that up, but they leave the meaty bits behind. I tried each flavor and not one cat will eat this stuff! I was hoping to save some money with Friskies, but it is what it is. And what it is is food that a hungry wild cat won't eat; she'd rather catch and eat mice. :\ My suggestion is to go to the store and buy a couple different cans and try that out first. If your cat doesn't eat it, it'll save you from having 2 dozen paper weights.

Review-3 : 
 These are like candy for our cat, who goes crazy when she hears the bag rustle (or anything that sounds like it).  If you want to give your cats treats, they will probably love these.  They are also so

# Hierarchical Clustering with 10 clusters

In [43]:
model = AgglomerativeClustering(n_clusters=10).fit(data.toarray())
# Getting all the reviews in different clusters
cluster1 = []
cluster2 = []
cluster3 = []
cluster4 = []
cluster5 = []
cluster6 = []
cluster7 = []
cluster8 = []
cluster9 = []
cluster10 = []

for i in range(model.labels_.shape[0]):
    if model.labels_[i] == 0:
        cluster1.append(reviews[i])
    elif model.labels_[i] == 1:
        cluster2.append(reviews[i])
    elif model.labels_[i] == 2:
        cluster3.append(reviews[i])
    elif model.labels_[i] == 3:
        cluster4.append(reviews[i])
    elif model.labels_[i] == 4:
        cluster5.append(reviews[i])
    elif model.labels_[i] == 5:
        cluster6.append(reviews[i])
    elif model.labels_[i] == 6:
        cluster7.append(reviews[i])
    elif model.labels_[i] == 7:
        cluster8.append(reviews[i])
    elif model.labels_[i] == 8:
        cluster9.append(reviews[i])       
    else :
        cluster10.append(reviews[i]) 

In [44]:
# Number of reviews in different clusters
print("No. of reviews in Cluster-1 : ",len(cluster1))
print("\nNo. of reviews in Cluster-2 : ",len(cluster2))
print("\nNo. of reviews in Cluster-3 : ",len(cluster3))
print("\nNo. of reviews in Cluster-4 : ",len(cluster4))
print("\nNo. of reviews in Cluster-5 : ",len(cluster5)) 
print("\nNo. of reviews in Cluster-6 : ",len(cluster6))
print("\nNo. of reviews in Cluster-7 : ",len(cluster7))
print("\nNo. of reviews in Cluster-8 : ",len(cluster8))
print("\nNo. of reviews in Cluster-9 : ",len(cluster9))
print("\nNo. of reviews in Cluster-10 : ",len(cluster10))

No. of reviews in Cluster-1 :  3745

No. of reviews in Cluster-2 :  212

No. of reviews in Cluster-3 :  147

No. of reviews in Cluster-4 :  257

No. of reviews in Cluster-5 :  71

No. of reviews in Cluster-6 :  289

No. of reviews in Cluster-7 :  64

No. of reviews in Cluster-8 :  62

No. of reviews in Cluster-9 :  57

No. of reviews in Cluster-10 :  96


READING REVIEWS MANUALLY:

In [45]:
# Three Reviews of cluster 1
count=1
for i in range(3):
    if i < len(cluster1):
        print('Review-%d : \n %s\n'%(count,cluster1[i]))
        count +=1

Review-1 : 
 I first tried this product on Princess cruise and since bought it online from Amazon.  I like how there are 25 bags which retains its freshness.  I will definitely be purchasing this product on a continuous basis for my coffee each morning!

Review-2 : 
 Awful Awful taste...and phosphoric sick color...<br />Awful Awful taste...and phosphoric sick color...<br />Awful Awful taste...and phosphoric sick color...<br /><br />I was totally schemed, this is not a drink. it is an acid poison.<br />Who for god sake manufacture and sale this kind of awful product.<br /><br />Don't believe it? try for your self.<br />I only show "The king is naked"

Review-3 : 
 This is a really nice product for those who want to consume healthful things. Refreshing, tasty and guiltless beverages are too few and far between these days, but this qualifies.<br /><br />Far too many "vitamin drinks" on the shelf are packed with empty calories, high-fructose corn syrup, and no actual fruit juice (or anythi

In [46]:
# Three Reviews of cluster 2
count=1
for i in range(3):
    if i < len(cluster2):
        print('Review-%d : \n %s\n'%(count,cluster2[i]))
        count +=1

Review-1 : 
 Given that our dogs don't get rawhide treats often, we thought they might like to try these. However, both our 70 lb Aussie and 10 lb. Papillon rejected them. The Aussie eventually gave in and tried to bury one in the back yard, but never bothered to cover the hole. This is pretty important, because our boys will eat nearly anything! These were a nice size for small or large dogs, but neither seemed interested, despite that fact.  That said, these two dogs of ours love dehydrated cow lungs and organic carrots for snacks, so perhaps their pallates are just too refined for cardboard filled rawhide chews.

Review-2 : 
 I must say these are the best puffed lamb ears we've tried and my dog who normally has a very sensitive stomach can eat as many as she wants and never gets ill from them. They are crunchy and fun and smell good too; all the things a dog likes in a treat. We buy them all the time.

Review-3 : 
 I have two golden retrievers with hearty appetites.  Feeding them re

In [47]:
# Three Reviews of cluster 3
count=1
for i in range(3):
    if i < len(cluster3):
        print('Review-%d : \n %s\n'%(count,cluster3[i]))
        count +=1

Review-1 : 
 25calories! And chocolate! I am on weight watchers and this is only one point! I love this for night time treat.

Review-2 : 
 Both the oatmeal and double chocolate chunk taste and look like quality products. They are both very rich ingredients and especially for the chocolate one, you can definitely taste the chocolatey chunks (and see them on your hands too)!<br /><br />Unfortunately the other reviewer is correct, these are very dry. Eat them with a coffee, tea or milk and it's a great pairing. If you don't have water or some other drink lying around you will probably find these hard to stomach and swallow.

Review-3 : 
 I received a free sample of this bar from Influenster.com.  I really enjoyed this bar.  It's a great alternative to eating candy bars or unhealthy sweets.  If you are working and need a mid-day snack, these are great.  I can also see how they'd be a great bar to eat in the mornings if you are running late but want to make sure you don't skip breakfast.  

In [48]:
# Three Reviews of cluster 4
count=1
for i in range(3):
    if i < len(cluster4):
        print('Review-%d : \n %s\n'%(count,cluster4[i]))
        count +=1

Review-1 : 
 I have tried literally dozens of teas since being introduced to the Russian custom of preparing and drinking tea, and the Ahmad Imperial blend is probably my favorite of all of these due to the fact that it is equally delicious hot or iced.  I buy the three-packs as needed from Amazon and go through them rapidly. My family loves it and my friends have it in their homes now too, try one box of it and you will love it too.

Review-2 : 
 No idea why some people are saying this is bad... I guess it doesnt compare to the teas when you're actually in Thailand or something. As for me, this tastes just like the tea from every thai restaraunt i've been to... which is a good thing! No complaints from me , I found what I was looking for. Only giving 4 stars cause I reserve 5 for products which truly blow my mind

Review-3 : 
 Some of the finest tea I've had.  It is a pleasure on the palette, as well as to the nose.  In addition to the wonderful taste and aroma, this tea (The Choice O

In [49]:
# Three Reviews of cluster 5
count=1
for i in range(3):
    if i < len(cluster5):
        print('Review-%d : \n %s\n'%(count,cluster5[i]))
        count +=1

Review-1 : 
 This oil is not liquid at room temperature. Also, it has a relatively low smoke point. Those facts out of they way, this is a very high quality coconut oil. It smells wonderful and everything cooked in it takes on a hint of coconut. I use this in my coffee, fry eggs in it, saute spinach in it, as well as enjoy a tablespoon of it before my workouts. I have read that there are non-food related things you can do with this as well (good for skin, etc) but I have only experienced it through a culinary lens.

Review-2 : 
 I had been wanting to try rice bran oil for several months, but couldn't find it anywhere locally.  I even went to an Asian grocery store and they had never heard of it!  After searching I discovered that Amazon had the Tophe 1/2 gallon rice bran oil for a better price than any other that I could find, so I ordered two of them.  I poured both into my deep fryer, and WOW!  I couldn't believe how easily and perfectly everything browned, and everything came out pe

In [50]:
# Three Reviews of cluster 6
count=1
for i in range(3):
    if i < len(cluster6):
        print('Review-%d : \n %s\n'%(count,cluster6[i]))
        count +=1

Review-1 : 
 I thought this coffee was too weak, and had a slightly sour aftertaste. I do prefer a bolder coffee, so perhaps someone who likes a lighter roast would enjoy it; I did not.<br /><br />For those with a Solofill, I tried this coffee using the refillable cup in my Keurig brewer, but it didn't work too well. I didn't see anything about the grind in the product description (maybe I just overlooked it), but this coffee is ground finely. If you do use it in the Solofill, spoon it in so it is very loose, then shake, or the water won't flow through well.

Review-2 : 
 I ordered a second case.  Makes great Dark and Stormies or very refreshing and medicinal cold out of the bottle.  My wife won't touch the stuff.  You got to like ginger or ginger ale. You will be disappointed if not.

Review-3 : 
 Usually do not get the breakfast blends but it was on sale and decided at price would try it out. It was not as weak as some breakfast blends tend to be. Not bad.



In [51]:
# Three Reviews of cluster 7
count=1
for i in range(3):
    if i < len(cluster7):
        print('Review-%d : \n %s\n'%(count,cluster7[i]))
        count +=1

Review-1 : 
 Completely opposite of other reviewer.  I love these cookies.  However, if you are looking for a cookie that is laden with sugar and chemicals, this is not it.  These aren't overly sweet.  And really not too much chocolate taste in these either.  But I like them.  Finally, a cookie that tastes good that I can eat.  No chemicals, very little salt (35mg) and only 5mg cholesterol.  Oh, and the sugar is only 3g.  So, definitely not sweet.

Review-2 : 
 These cookies are tasty.  At times they're a normal part of my daily diet.  I've eaten a bag for breakfast on multiple occasions.  Also try Trader Joe's vegan trail mix cookies for an alternative.  I can't decide which ones I like better.  TJ's are chewier.  Both rank as my favorite cookies.

Review-3 : 
 These are delicious! They taste like little shortbread cookies and are coated with a powdery "frosting" flavored powder. I love that they're made with whole grains and at 140 calories for 35 crackers they make a great snack!



In [52]:
# Three Reviews of cluster 8
count=1
for i in range(3):
    if i < len(cluster8):
        print('Review-%d : \n %s\n'%(count,cluster8[i]))
        count +=1

Review-1 : 
 I made crab rangoon and used this sauce as a dipping sauce.  It was great.  I love the fact that it doesn't have MSG in it.  It has the right amount of sweetness.  It is a little spicy, but just the right amount.

Review-2 : 
 Best hot sauce, I've ever tried!!! DO NOT BE SKEPTIC about this! Read all reviews on the I-net, before you order. Just ONE DROP will do it! I mean it! I love spicy food, but this sauce can make you cry and sweat and your a*hole won't be happy, trust me!<br />Tabasco is a sweet tea in comparison to Dave's Insanity!<br />Never ever try it on your friends or as a prank! NEVER! Enjoy!

Review-3 : 
 I decided to try these noodles with pesto sauce. They were great! They are whole grain buckwheat which is supposed to be nutritious and they taste good. I will use them instead of other pastas whenever I can.



In [53]:
# Three Reviews of cluster 9
count=1
for i in range(3):
    if i < len(cluster9):
        print('Review-%d : \n %s\n'%(count,cluster9[i]))
        count +=1

Review-1 : 
 Really pleased with first order back in July. Not so much with the second order. When the second order arrived the packaging hinted at the difference. The first order was packaged better in a clear plastic bag inside the box. Second order came in gray plastic bag hiding the contents and was sloppily taped together. Inside was a baggie tied with a twist tie containing the candy that looked like someone did it in their kitchen almost. The product in the second order looked and tasted old, like something left in a warehouse too long. Candy is not fresh-tasting and it looks almost oxidized or something -- a strange, muddled color of purple. When I have bought Pops that looked like this at the store they always tasted old and were a little chewy. These look and taste the same, so guessing they have been sitting somewhere too long. The outer candy coating is chewy and somewhat soft (the middle is supposed to be chewy). So to sum: first order great tasting and nice package, secon

In [54]:
# Three Reviews of cluster 10
count=1
for i in range(3):
    if i < len(cluster10):
        print('Review-%d : \n %s\n'%(count,cluster10[i]))
        count +=1

Review-1 : 
 This is the best cat food for my feral cats! The 40 lb size is a great buy, and the cat food is good for them! you really can't beat the price for decent food for feeding a lot of cats!

Review-2 : 
 What a waste of money. All four cats (three indoor and one feral outdoor cat) won't eat the wet food. They like the juice and will lick that up, but they leave the meaty bits behind. I tried each flavor and not one cat will eat this stuff! I was hoping to save some money with Friskies, but it is what it is. And what it is is food that a hungry wild cat won't eat; she'd rather catch and eat mice. :\ My suggestion is to go to the store and buy a couple different cans and try that out first. If your cat doesn't eat it, it'll save you from having 2 dozen paper weights.

Review-3 : 
 These are like candy for our cat, who goes crazy when she hears the bag rustle (or anything that sounds like it).  If you want to give your cats treats, they will probably love these.  They are also so

# Word2Vec

In [55]:
# List of sentence in X_train text
sent_x = []
for sent in x :
    sent_x.append(sent.split())
  
    
# Train your own Word2Vec model using your own train text corpus 
# min_count = 5 considers only words that occured atleast 5 times
w2v_model=Word2Vec(sent_x,min_count=5,size=50, workers=4)

w2v_words = list(w2v_model.wv.vocab)
print("number of words that occured minimum 5 times ",len(w2v_words))

number of words that occured minimum 5 times  3149


# (3). Avg Word2Vec

In [56]:
# compute average word2vec for each review for sent_x .
train_vectors = []; 
for sent in sent_x:
    sent_vec = np.zeros(50) 
    cnt_words =0; 
    for word in sent: # 
        if word in w2v_words:
            vec = w2v_model.wv[word]
            sent_vec += vec
            cnt_words += 1
    if cnt_words != 0:
        sent_vec /= cnt_words
    train_vectors.append(sent_vec)
    
data = train_vectors  

# Hierarchical Clustering with 2 clusters

In [58]:
model = AgglomerativeClustering(n_clusters=2).fit(data)

reviews = my_final['Text'].values
# Getting all the reviews in different clusters
cluster1 = []
cluster2 = []

for i in range(model.labels_.shape[0]):
    if model.labels_[i] == 0:
        cluster1.append(reviews[i])
    else :
        cluster2.append(reviews[i])
 
        
# Number of reviews in different clusters
print("No. of reviews in Cluster-1 : ",len(cluster1))
print("\nNo. of reviews in Cluster-2 : ",len(cluster2))

No. of reviews in Cluster-1 :  2948

No. of reviews in Cluster-2 :  2052


READING REVIEWS MANUALLY:

In [59]:
# Three Reviews of cluster 1
count=1
for i in range(3):
    print('Review-%d : \n %s\n'%(count,cluster1[i]))
    count +=1

Review-1 : 
 I made crab rangoon and used this sauce as a dipping sauce.  It was great.  I love the fact that it doesn't have MSG in it.  It has the right amount of sweetness.  It is a little spicy, but just the right amount.

Review-2 : 
 Awful Awful taste...and phosphoric sick color...<br />Awful Awful taste...and phosphoric sick color...<br />Awful Awful taste...and phosphoric sick color...<br /><br />I was totally schemed, this is not a drink. it is an acid poison.<br />Who for god sake manufacture and sale this kind of awful product.<br /><br />Don't believe it? try for your self.<br />I only show "The king is naked"

Review-3 : 
 This is a really nice product for those who want to consume healthful things. Refreshing, tasty and guiltless beverages are too few and far between these days, but this qualifies.<br /><br />Far too many "vitamin drinks" on the shelf are packed with empty calories, high-fructose corn syrup, and no actual fruit juice (or anything else good for you, for th

In [60]:
# Three Reviews of cluster 2
count=1
for i in range(3):
    if i < len(cluster2):
        print('Review-%d : \n %s\n'%(count,cluster2[i]))
        count +=1

Review-1 : 
 I first tried this product on Princess cruise and since bought it online from Amazon.  I like how there are 25 bags which retains its freshness.  I will definitely be purchasing this product on a continuous basis for my coffee each morning!

Review-2 : 
 A friend introduced this tea several years ago, and I have been searching our local grocery stores and kitchen shops for it since then to no avail.  Then I thought about searching online for it and found it at the best price through Amazon.com.  I purchased a large quantity of this tea and have been serving it to family and friends throughout this Holiday Season.  Everyone loves it!!

Review-3 : 
 So grateful for this!! What an amazing mix. It can be used to make some of the best gluten free products out there. What a break through for us gluten free individuals who miss REAL mixes. Some of the other ones on the market just dont do the trick like this does :)



# Hierarchical Clustering with 5 clusters

In [62]:
model = AgglomerativeClustering(n_clusters=5).fit(data)

# Getting all the reviews in different clusters
cluster1 = []
cluster2 = []
cluster3 = []
cluster4 = []
cluster5 = []

for i in range(model.labels_.shape[0]):
    if model.labels_[i] == 0:
        cluster1.append(reviews[i])
    elif model.labels_[i] == 1:
        cluster2.append(reviews[i])
    elif model.labels_[i] == 2:
        cluster3.append(reviews[i])
    elif model.labels_[i] == 3:
        cluster4.append(reviews[i])
    else :
        cluster5.append(reviews[i]) 
        
# Number of reviews in different clusters
print("No. of reviews in Cluster-1 : ",len(cluster1))
print("\nNo. of reviews in Cluster-2 : ",len(cluster2))
print("\nNo. of reviews in Cluster-3 : ",len(cluster3))
print("\nNo. of reviews in Cluster-4 : ",len(cluster4))
print("\nNo. of reviews in Cluster-5 : ",len(cluster5))

No. of reviews in Cluster-1 :  1688

No. of reviews in Cluster-2 :  1712

No. of reviews in Cluster-3 :  818

No. of reviews in Cluster-4 :  364

No. of reviews in Cluster-5 :  418


READING REVIEWS MANUALLY:

In [63]:
# Three Reviews of cluster 1
count=1
for i in range(3):
    if i < len(cluster1):
        print('Review-%d : \n %s\n'%(count,cluster1[i]))
        count +=1

Review-1 : 
 So grateful for this!! What an amazing mix. It can be used to make some of the best gluten free products out there. What a break through for us gluten free individuals who miss REAL mixes. Some of the other ones on the market just dont do the trick like this does :)

Review-2 : 
 I should have listened to the other one star reviewer. My antler was NOT like the picture. Unfortunately, I too got half an antler - meaning it was half of a bisected piece. This exposes all of the marrow, gives your dog no challenge or time with the antler, and may cause an upset system / stomach for the dog. I won't buy this brand again.

Review-3 : 
 25calories! And chocolate! I am on weight watchers and this is only one point! I love this for night time treat.



In [64]:
# Three Reviews of cluster 2
count=1
for i in range(3):
    if i < len(cluster2):
        print('Review-%d : \n %s\n'%(count,cluster2[i]))
        count +=1

Review-1 : 
 This is a really nice product for those who want to consume healthful things. Refreshing, tasty and guiltless beverages are too few and far between these days, but this qualifies.<br /><br />Far too many "vitamin drinks" on the shelf are packed with empty calories, high-fructose corn syrup, and no actual fruit juice (or anything else good for you, for that matter). This is a refreshing exception.<br /><br />The pros: 70% juice, not overly calorie-ridden, the Izze drinks DO contain a bit of Vitamins C, B6 and Niacin, but do NOT contain any dreaded (and seemingly ubiquitous) high-fructose corn syrup (about which I have heard nothing good. Not one thing), and it tastes really good.<br /><br />The cons: Okay, not so much a con perhaps, but one should be aware that the product contains no more than 10% of the RDA of any of the aforementioned vitamins and minerals.<br /><br />So as long as you don't think of it as a magic health elixir, but rather as a great-tasting, refreshing 

In [65]:
# Three Reviews of cluster 3
count=1
for i in range(3):
    if i < len(cluster3):
        print('Review-%d : \n %s\n'%(count,cluster3[i]))
        count +=1

Review-1 : 
 I made crab rangoon and used this sauce as a dipping sauce.  It was great.  I love the fact that it doesn't have MSG in it.  It has the right amount of sweetness.  It is a little spicy, but just the right amount.

Review-2 : 
 This is a great mix.  Used it in a crock pot, and it's wonderful to come home to a great  meal!

Review-3 : 
 Very tasty.  I was worried that this would be too hot for some of my family but everyone liked it.  Best with shrimp but it also works well with chicken.



In [66]:
# Three Reviews of cluster 4
count=1
for i in range(3):
    if i < len(cluster4):
        print('Review-%d : \n %s\n'%(count,cluster4[i]))
        count +=1

Review-1 : 
 I first tried this product on Princess cruise and since bought it online from Amazon.  I like how there are 25 bags which retains its freshness.  I will definitely be purchasing this product on a continuous basis for my coffee each morning!

Review-2 : 
 A friend introduced this tea several years ago, and I have been searching our local grocery stores and kitchen shops for it since then to no avail.  Then I thought about searching online for it and found it at the best price through Amazon.com.  I purchased a large quantity of this tea and have been serving it to family and friends throughout this Holiday Season.  Everyone loves it!!

Review-3 : 
 My sister loves Good Earth Original Caffeine Free tea.  We used to be able to get it locally, but it has become less available over the last year.  I searched several stores before checking amazon.  I was so happy to have found it here and will continue to purchase it here for as long as possible.



In [67]:
# Three Reviews of cluster 5
count=1
for i in range(3):
    if i < len(cluster5):
        print('Review-%d : \n %s\n'%(count,cluster5[i]))
        count +=1

Review-1 : 
 Awful Awful taste...and phosphoric sick color...<br />Awful Awful taste...and phosphoric sick color...<br />Awful Awful taste...and phosphoric sick color...<br /><br />I was totally schemed, this is not a drink. it is an acid poison.<br />Who for god sake manufacture and sale this kind of awful product.<br /><br />Don't believe it? try for your self.<br />I only show "The king is naked"

Review-2 : 
 I was misled by the name and thought that the cotechino was imported from Italy. Wrong! It was made in the US but was so salty that it was inedible. You learn from your mistakes.

Review-3 : 
 Imagine farmers on earth lose the ability to grow peanuts. And, 10000 years down the road, an alien species who never tasted peanut butter before tries to whip up a batch. I am sure the result would be something like Walden Farms Peanut Spread.<br /><br />It's not very peanuty, not very buttery...it's just the vaguest notion of peanut taste in a very foul paste-like consistency. The afte

# Hierarchical Clustering with 10 clusters

In [69]:
model = AgglomerativeClustering(n_clusters=10).fit(data)
# Getting all the reviews in different clusters
cluster1 = []
cluster2 = []
cluster3 = []
cluster4 = []
cluster5 = []
cluster6 = []
cluster7 = []
cluster8 = []
cluster9 = []
cluster10 = []

for i in range(model.labels_.shape[0]):
    if model.labels_[i] == 0:
        cluster1.append(reviews[i])
    elif model.labels_[i] == 1:
        cluster2.append(reviews[i])
    elif model.labels_[i] == 2:
        cluster3.append(reviews[i])
    elif model.labels_[i] == 3:
        cluster4.append(reviews[i])
    elif model.labels_[i] == 4:
        cluster5.append(reviews[i])
    elif model.labels_[i] == 5:
        cluster6.append(reviews[i])
    elif model.labels_[i] == 6:
        cluster7.append(reviews[i])
    elif model.labels_[i] == 7:
        cluster8.append(reviews[i])
    elif model.labels_[i] == 8:
        cluster9.append(reviews[i])       
    else :
        cluster10.append(reviews[i]) 

In [70]:
# Number of reviews in different clusters
print("No. of reviews in Cluster-1 : ",len(cluster1))
print("\nNo. of reviews in Cluster-2 : ",len(cluster2))
print("\nNo. of reviews in Cluster-3 : ",len(cluster3))
print("\nNo. of reviews in Cluster-4 : ",len(cluster4))
print("\nNo. of reviews in Cluster-5 : ",len(cluster5)) 
print("\nNo. of reviews in Cluster-6 : ",len(cluster6))
print("\nNo. of reviews in Cluster-7 : ",len(cluster7))
print("\nNo. of reviews in Cluster-8 : ",len(cluster8))
print("\nNo. of reviews in Cluster-9 : ",len(cluster9))
print("\nNo. of reviews in Cluster-10 : ",len(cluster10)) 

No. of reviews in Cluster-1 :  601

No. of reviews in Cluster-2 :  1260

No. of reviews in Cluster-3 :  452

No. of reviews in Cluster-4 :  317

No. of reviews in Cluster-5 :  418

No. of reviews in Cluster-6 :  413

No. of reviews in Cluster-7 :  501

No. of reviews in Cluster-8 :  298

No. of reviews in Cluster-9 :  674

No. of reviews in Cluster-10 :  66


READING REVIEWS MANUALLY:

In [71]:
# Three Reviews of cluster 1
count=1
for i in range(3):
    if i < len(cluster1):
        print('Review-%d : \n %s\n'%(count,cluster1[i]))
        count +=1

Review-1 : 
 I got a popcorn maker for Christmas and after doing some research, I found this product. The key is the oil. Since than, I have ordered several boxes. It seems I always have popcorn on hand now. Once you try it, you will be sold.

Review-2 : 
 Tried for a few years to obtain gooseberries but Amazon was the go-to place!!  Now I can bake pies just like my mothers and remember her every time I take a bite.  Thank you!!!!

Review-3 : 
 The product itself was inedible and mostly crumbs. It even smelled bad. The star is there because of the packing material. My cats LOVED the packing material. Played in it for days. I'd almost buy the product again for that packing material.



In [72]:
# Three Reviews of cluster 2
count=1
for i in range(3):
    if i < len(cluster2):
        print('Review-%d : \n %s\n'%(count,cluster2[i]))
        count +=1

Review-1 : 
 This is a really nice product for those who want to consume healthful things. Refreshing, tasty and guiltless beverages are too few and far between these days, but this qualifies.<br /><br />Far too many "vitamin drinks" on the shelf are packed with empty calories, high-fructose corn syrup, and no actual fruit juice (or anything else good for you, for that matter). This is a refreshing exception.<br /><br />The pros: 70% juice, not overly calorie-ridden, the Izze drinks DO contain a bit of Vitamins C, B6 and Niacin, but do NOT contain any dreaded (and seemingly ubiquitous) high-fructose corn syrup (about which I have heard nothing good. Not one thing), and it tastes really good.<br /><br />The cons: Okay, not so much a con perhaps, but one should be aware that the product contains no more than 10% of the RDA of any of the aforementioned vitamins and minerals.<br /><br />So as long as you don't think of it as a magic health elixir, but rather as a great-tasting, refreshing 

In [73]:
# Three Reviews of cluster 3
count=1
for i in range(3):
    if i < len(cluster3):
        print('Review-%d : \n %s\n'%(count,cluster3[i]))
        count +=1

Review-1 : 
 This tea has a smooth and flavorful taste.  No bitterness. Cheaper buying it this way,than at store. 40 bags $3.50 great way to save. and always have them on hand

Review-2 : 
 I have tried literally dozens of teas since being introduced to the Russian custom of preparing and drinking tea, and the Ahmad Imperial blend is probably my favorite of all of these due to the fact that it is equally delicious hot or iced.  I buy the three-packs as needed from Amazon and go through them rapidly. My family loves it and my friends have it in their homes now too, try one box of it and you will love it too.

Review-3 : 
 I thought this coffee was too weak, and had a slightly sour aftertaste. I do prefer a bolder coffee, so perhaps someone who likes a lighter roast would enjoy it; I did not.<br /><br />For those with a Solofill, I tried this coffee using the refillable cup in my Keurig brewer, but it didn't work too well. I didn't see anything about the grind in the product description 

In [74]:
# Three Reviews of cluster 4
count=1
for i in range(3):
    if i < len(cluster4):
        print('Review-%d : \n %s\n'%(count,cluster4[i]))
        count +=1

Review-1 : 
 Completely opposite of other reviewer.  I love these cookies.  However, if you are looking for a cookie that is laden with sugar and chemicals, this is not it.  These aren't overly sweet.  And really not too much chocolate taste in these either.  But I like them.  Finally, a cookie that tastes good that I can eat.  No chemicals, very little salt (35mg) and only 5mg cholesterol.  Oh, and the sugar is only 3g.  So, definitely not sweet.

Review-2 : 
 Some of the finest tea I've had.  It is a pleasure on the palette, as well as to the nose.  In addition to the wonderful taste and aroma, this tea (The Choice Organic Tea line) is "Fair Trade".<br /><br />This tea is caffeine free and delicious hot or cold. --> i have added 2 bags when brewing in the sun for iced tea.  Delicious, when sweetened with natural honey!<br /><br />It is important to believe in a product, and this one uses as many Fair Trade ingredients as possible.

Review-3 : 
 U can taste the lemon in this blend.  I

In [75]:
# Three Reviews of cluster 5
count=1
for i in range(3):
    if i < len(cluster5):
        print('Review-%d : \n %s\n'%(count,cluster5[i]))
        count +=1

Review-1 : 
 Awful Awful taste...and phosphoric sick color...<br />Awful Awful taste...and phosphoric sick color...<br />Awful Awful taste...and phosphoric sick color...<br /><br />I was totally schemed, this is not a drink. it is an acid poison.<br />Who for god sake manufacture and sale this kind of awful product.<br /><br />Don't believe it? try for your self.<br />I only show "The king is naked"

Review-2 : 
 I was misled by the name and thought that the cotechino was imported from Italy. Wrong! It was made in the US but was so salty that it was inedible. You learn from your mistakes.

Review-3 : 
 Imagine farmers on earth lose the ability to grow peanuts. And, 10000 years down the road, an alien species who never tasted peanut butter before tries to whip up a batch. I am sure the result would be something like Walden Farms Peanut Spread.<br /><br />It's not very peanuty, not very buttery...it's just the vaguest notion of peanut taste in a very foul paste-like consistency. The afte

In [76]:
# Three Reviews of cluster 6
count=1
for i in range(3):
    if i < len(cluster6):
        print('Review-%d : \n %s\n'%(count,cluster6[i]))
        count +=1

Review-1 : 
 I should have listened to the other one star reviewer. My antler was NOT like the picture. Unfortunately, I too got half an antler - meaning it was half of a bisected piece. This exposes all of the marrow, gives your dog no challenge or time with the antler, and may cause an upset system / stomach for the dog. I won't buy this brand again.

Review-2 : 
 25calories! And chocolate! I am on weight watchers and this is only one point! I love this for night time treat.

Review-3 : 
 Excellent cocktails I am preparing and having fun with my girlfriend and friends.<br />Great price and quality! I highly recommend this syrup for your lychee martinis.



In [77]:
# Three Reviews of cluster 7
count=1
for i in range(3):
    if i < len(cluster7):
        print('Review-%d : \n %s\n'%(count,cluster7[i]))
        count +=1

Review-1 : 
 I made crab rangoon and used this sauce as a dipping sauce.  It was great.  I love the fact that it doesn't have MSG in it.  It has the right amount of sweetness.  It is a little spicy, but just the right amount.

Review-2 : 
 This is a great mix.  Used it in a crock pot, and it's wonderful to come home to a great  meal!

Review-3 : 
 Very tasty.  I was worried that this would be too hot for some of my family but everyone liked it.  Best with shrimp but it also works well with chicken.



In [78]:
# Three Reviews of cluster 8
count=1
for i in range(3):
    if i < len(cluster8):
        print('Review-%d : \n %s\n'%(count,cluster8[i]))
        count +=1

Review-1 : 
 I first tried this product on Princess cruise and since bought it online from Amazon.  I like how there are 25 bags which retains its freshness.  I will definitely be purchasing this product on a continuous basis for my coffee each morning!

Review-2 : 
 A friend introduced this tea several years ago, and I have been searching our local grocery stores and kitchen shops for it since then to no avail.  Then I thought about searching online for it and found it at the best price through Amazon.com.  I purchased a large quantity of this tea and have been serving it to family and friends throughout this Holiday Season.  Everyone loves it!!

Review-3 : 
 My sister loves Good Earth Original Caffeine Free tea.  We used to be able to get it locally, but it has become less available over the last year.  I searched several stores before checking amazon.  I was so happy to have found it here and will continue to purchase it here for as long as possible.



In [79]:
# Three Reviews of cluster 9
count=1
for i in range(3):
    if i < len(cluster9):
        print('Review-%d : \n %s\n'%(count,cluster9[i]))
        count +=1

Review-1 : 
 So grateful for this!! What an amazing mix. It can be used to make some of the best gluten free products out there. What a break through for us gluten free individuals who miss REAL mixes. Some of the other ones on the market just dont do the trick like this does :)

Review-2 : 
 I love licorice these were dry and the flavor was nasty! I think they were many years old no date on the bag. Had to throw them away! Don't waste your money sorry I did.

Review-3 : 
 I have to say this is the best canned soup I have ever eaten. The fact that it is organic with no yucky processed stuff makes me even happier. You don't have to season this soup, at least I don't, it tastes great just heated out of the can. It flies off the organic foods shelf at my local grocery and this 12 pack is the only way to keep it in the pantry. I seldom find anything I am this happy about so try it you will love it!



In [80]:
# Three Reviews of cluster 10
count=1
for i in range(3):
    if i < len(cluster10):
        print('Review-%d : \n %s\n'%(count,cluster10[i]))
        count +=1

Review-1 : 
 This is not a good deal.  I can go to target or other grocery stores and buy four boxes for at least $6.00 cheaper...

Review-2 : 
 The item arrived on-time and in the advertised condition. Would order this product from Amazon again.

Review-3 : 
 The price was considerably better a while back at $19.44. How can they figure the original price was $6.30 per bag, when the company who manufacture's this product sells it for $3.99 per bag plus shipping. For the price they are charging now, I can get it at my local healthfood store for much less. The product is excellent and have used it for many years, but Amazon's price is not a deal anymore!



# (4). TFIDF-Word2Vec

In [81]:
# TF-IDF weighted Word2Vec
tf_idf_vect = TfidfVectorizer()

# final_tf_idf1 is the sparse matrix with row= sentence, col=word and cell_val = tfidf
final_tf_idf1 = tf_idf_vect.fit_transform(x)

# tfidf words/col-names
tfidf_feat = tf_idf_vect.get_feature_names()

# compute TFIDF Weighted Word2Vec for each review for sent_x .
tfidf_vectors = []; 
row=0;
for sent in sent_x: 
    sent_vec = np.zeros(50) 
    weight_sum =0; 
    for word in sent: 
        if word in w2v_words:
            vec = w2v_model.wv[word]
            # obtain the tf_idfidf of a word in a sentence/review
            tf_idf = final_tf_idf1[row, tfidf_feat.index(word)]
            sent_vec += (vec * tf_idf)
            weight_sum += tf_idf
    if weight_sum != 0:
        sent_vec /= weight_sum
    tfidf_vectors.append(sent_vec)
    row += 1 
    
data = tfidf_vectors  

# Hierarchical Clustering with 2 clusters

In [82]:
model = AgglomerativeClustering(n_clusters=2).fit(data)

reviews = my_final['Text'].values
# Getting all the reviews in different clusters
cluster1 = []
cluster2 = []

for i in range(model.labels_.shape[0]):
    if model.labels_[i] == 0:
        cluster1.append(reviews[i])
    else :
        cluster2.append(reviews[i])
 
        
# Number of reviews in different clusters
print("No. of reviews in Cluster-1 : ",len(cluster1))
print("\nNo. of reviews in Cluster-2 : ",len(cluster2))

No. of reviews in Cluster-1 :  3254

No. of reviews in Cluster-2 :  1746


READING REVIEWS MANUALLY:

In [84]:
# Three Reviews of cluster 1
count=1
for i in range(3):
    if i < len(cluster1):
        print('Review-%d : \n %s\n'%(count,cluster1[i]))
        count +=1

Review-1 : 
 I made crab rangoon and used this sauce as a dipping sauce.  It was great.  I love the fact that it doesn't have MSG in it.  It has the right amount of sweetness.  It is a little spicy, but just the right amount.

Review-2 : 
 Awful Awful taste...and phosphoric sick color...<br />Awful Awful taste...and phosphoric sick color...<br />Awful Awful taste...and phosphoric sick color...<br /><br />I was totally schemed, this is not a drink. it is an acid poison.<br />Who for god sake manufacture and sale this kind of awful product.<br /><br />Don't believe it? try for your self.<br />I only show "The king is naked"

Review-3 : 
 This is a really nice product for those who want to consume healthful things. Refreshing, tasty and guiltless beverages are too few and far between these days, but this qualifies.<br /><br />Far too many "vitamin drinks" on the shelf are packed with empty calories, high-fructose corn syrup, and no actual fruit juice (or anything else good for you, for th

In [85]:
# Three Reviews of cluster 2
count=1
for i in range(3):
    if i < len(cluster2):
        print('Review-%d : \n %s\n'%(count,cluster2[i]))
        count +=1

Review-1 : 
 I first tried this product on Princess cruise and since bought it online from Amazon.  I like how there are 25 bags which retains its freshness.  I will definitely be purchasing this product on a continuous basis for my coffee each morning!

Review-2 : 
 A friend introduced this tea several years ago, and I have been searching our local grocery stores and kitchen shops for it since then to no avail.  Then I thought about searching online for it and found it at the best price through Amazon.com.  I purchased a large quantity of this tea and have been serving it to family and friends throughout this Holiday Season.  Everyone loves it!!

Review-3 : 
 So grateful for this!! What an amazing mix. It can be used to make some of the best gluten free products out there. What a break through for us gluten free individuals who miss REAL mixes. Some of the other ones on the market just dont do the trick like this does :)



# Hierarchical Clustering with 5 clusters

In [86]:
model = AgglomerativeClustering(n_clusters=5).fit(data)

# Getting all the reviews in different clusters
cluster1 = []
cluster2 = []
cluster3 = []
cluster4 = []
cluster5 = []

for i in range(model.labels_.shape[0]):
    if model.labels_[i] == 0:
        cluster1.append(reviews[i])
    elif model.labels_[i] == 1:
        cluster2.append(reviews[i])
    elif model.labels_[i] == 2:
        cluster3.append(reviews[i])
    elif model.labels_[i] == 3:
        cluster4.append(reviews[i])
    else :
        cluster5.append(reviews[i]) 
        
# Number of reviews in different clusters
print("No. of reviews in Cluster-1 : ",len(cluster1))
print("\nNo. of reviews in Cluster-2 : ",len(cluster2))
print("\nNo. of reviews in Cluster-3 : ",len(cluster3))
print("\nNo. of reviews in Cluster-4 : ",len(cluster4))
print("\nNo. of reviews in Cluster-5 : ",len(cluster5)) 

No. of reviews in Cluster-1 :  1425

No. of reviews in Cluster-2 :  1452

No. of reviews in Cluster-3 :  967

No. of reviews in Cluster-4 :  835

No. of reviews in Cluster-5 :  321


READING REVIEWS MANUALLY:

In [87]:
# Three Reviews of cluster 1
count=1
for i in range(3):
    if i < len(cluster1):
        print('Review-%d : \n %s\n'%(count,cluster1[i]))
        count +=1

Review-1 : 
 I first tried this product on Princess cruise and since bought it online from Amazon.  I like how there are 25 bags which retains its freshness.  I will definitely be purchasing this product on a continuous basis for my coffee each morning!

Review-2 : 
 So grateful for this!! What an amazing mix. It can be used to make some of the best gluten free products out there. What a break through for us gluten free individuals who miss REAL mixes. Some of the other ones on the market just dont do the trick like this does :)

Review-3 : 
 This tea has a smooth and flavorful taste.  No bitterness. Cheaper buying it this way,than at store. 40 bags $3.50 great way to save. and always have them on hand



In [88]:
# Three Reviews of cluster 2
count=1
for i in range(3):
    if i < len(cluster2):
        print('Review-%d : \n %s\n'%(count,cluster2[i]))
        count +=1

Review-1 : 
 This is a really nice product for those who want to consume healthful things. Refreshing, tasty and guiltless beverages are too few and far between these days, but this qualifies.<br /><br />Far too many "vitamin drinks" on the shelf are packed with empty calories, high-fructose corn syrup, and no actual fruit juice (or anything else good for you, for that matter). This is a refreshing exception.<br /><br />The pros: 70% juice, not overly calorie-ridden, the Izze drinks DO contain a bit of Vitamins C, B6 and Niacin, but do NOT contain any dreaded (and seemingly ubiquitous) high-fructose corn syrup (about which I have heard nothing good. Not one thing), and it tastes really good.<br /><br />The cons: Okay, not so much a con perhaps, but one should be aware that the product contains no more than 10% of the RDA of any of the aforementioned vitamins and minerals.<br /><br />So as long as you don't think of it as a magic health elixir, but rather as a great-tasting, refreshing 

In [89]:
# Three Reviews of cluster 3
count=1
for i in range(3):
    if i < len(cluster3):
        print('Review-%d : \n %s\n'%(count,cluster3[i]))
        count +=1

Review-1 : 
 I made crab rangoon and used this sauce as a dipping sauce.  It was great.  I love the fact that it doesn't have MSG in it.  It has the right amount of sweetness.  It is a little spicy, but just the right amount.

Review-2 : 
 I have tried literally dozens of teas since being introduced to the Russian custom of preparing and drinking tea, and the Ahmad Imperial blend is probably my favorite of all of these due to the fact that it is equally delicious hot or iced.  I buy the three-packs as needed from Amazon and go through them rapidly. My family loves it and my friends have it in their homes now too, try one box of it and you will love it too.

Review-3 : 
 I thought this coffee was too weak, and had a slightly sour aftertaste. I do prefer a bolder coffee, so perhaps someone who likes a lighter roast would enjoy it; I did not.<br /><br />For those with a Solofill, I tried this coffee using the refillable cup in my Keurig brewer, but it didn't work too well. I didn't see an

In [90]:
# Three Reviews of cluster 4
count=1
for i in range(3):
    if i < len(cluster4):
        print('Review-%d : \n %s\n'%(count,cluster4[i]))
        count +=1

Review-1 : 
 Awful Awful taste...and phosphoric sick color...<br />Awful Awful taste...and phosphoric sick color...<br />Awful Awful taste...and phosphoric sick color...<br /><br />I was totally schemed, this is not a drink. it is an acid poison.<br />Who for god sake manufacture and sale this kind of awful product.<br /><br />Don't believe it? try for your self.<br />I only show "The king is naked"

Review-2 : 
 I was misled by the name and thought that the cotechino was imported from Italy. Wrong! It was made in the US but was so salty that it was inedible. You learn from your mistakes.

Review-3 : 
 I should have listened to the other one star reviewer. My antler was NOT like the picture. Unfortunately, I too got half an antler - meaning it was half of a bisected piece. This exposes all of the marrow, gives your dog no challenge or time with the antler, and may cause an upset system / stomach for the dog. I won't buy this brand again.



In [91]:
# Three Reviews of cluster 5
count=1
for i in range(3):
    if i < len(cluster5):
        print('Review-%d : \n %s\n'%(count,cluster5[i]))
        count +=1

Review-1 : 
 A friend introduced this tea several years ago, and I have been searching our local grocery stores and kitchen shops for it since then to no avail.  Then I thought about searching online for it and found it at the best price through Amazon.com.  I purchased a large quantity of this tea and have been serving it to family and friends throughout this Holiday Season.  Everyone loves it!!

Review-2 : 
 My sister loves Good Earth Original Caffeine Free tea.  We used to be able to get it locally, but it has become less available over the last year.  I searched several stores before checking amazon.  I was so happy to have found it here and will continue to purchase it here for as long as possible.

Review-3 : 
 This is not a good deal.  I can go to target or other grocery stores and buy four boxes for at least $6.00 cheaper...



# Hierarchical Clustering with 10 clusters

In [93]:
model = AgglomerativeClustering(n_clusters=10).fit(data)
# Getting all the reviews in different clusters
cluster1 = []
cluster2 = []
cluster3 = []
cluster4 = []
cluster5 = []
cluster6 = []
cluster7 = []
cluster8 = []
cluster9 = []
cluster10 = []

for i in range(model.labels_.shape[0]):
    if model.labels_[i] == 0:
        cluster1.append(reviews[i])
    elif model.labels_[i] == 1:
        cluster2.append(reviews[i])
    elif model.labels_[i] == 2:
        cluster3.append(reviews[i])
    elif model.labels_[i] == 3:
        cluster4.append(reviews[i])
    elif model.labels_[i] == 4:
        cluster5.append(reviews[i])
    elif model.labels_[i] == 5:
        cluster6.append(reviews[i])
    elif model.labels_[i] == 6:
        cluster7.append(reviews[i])
    elif model.labels_[i] == 7:
        cluster8.append(reviews[i])
    elif model.labels_[i] == 8:
        cluster9.append(reviews[i])       
    else :
        cluster10.append(reviews[i]) 

In [94]:
# Number of reviews in different clusters
print("No. of reviews in Cluster-1 : ",len(cluster1))
print("\nNo. of reviews in Cluster-2 : ",len(cluster2))
print("\nNo. of reviews in Cluster-3 : ",len(cluster3))
print("\nNo. of reviews in Cluster-4 : ",len(cluster4))
print("\nNo. of reviews in Cluster-5 : ",len(cluster5)) 
print("\nNo. of reviews in Cluster-6 : ",len(cluster6))
print("\nNo. of reviews in Cluster-7 : ",len(cluster7))
print("\nNo. of reviews in Cluster-8 : ",len(cluster8))
print("\nNo. of reviews in Cluster-9 : ",len(cluster9))
print("\nNo. of reviews in Cluster-10 : ",len(cluster10)) 

No. of reviews in Cluster-1 :  1078

No. of reviews in Cluster-2 :  835

No. of reviews in Cluster-3 :  264

No. of reviews in Cluster-4 :  731

No. of reviews in Cluster-5 :  321

No. of reviews in Cluster-6 :  366

No. of reviews in Cluster-7 :  288

No. of reviews in Cluster-8 :  721

No. of reviews in Cluster-9 :  337

No. of reviews in Cluster-10 :  59


READING REVIEWS MANUALLY:

In [95]:
# Three Reviews of cluster 1
count=1
for i in range(3):
    if i < len(cluster1):
        print('Review-%d : \n %s\n'%(count,cluster1[i]))
        count +=1

Review-1 : 
 I first tried this product on Princess cruise and since bought it online from Amazon.  I like how there are 25 bags which retains its freshness.  I will definitely be purchasing this product on a continuous basis for my coffee each morning!

Review-2 : 
 So grateful for this!! What an amazing mix. It can be used to make some of the best gluten free products out there. What a break through for us gluten free individuals who miss REAL mixes. Some of the other ones on the market just dont do the trick like this does :)

Review-3 : 
 This tea has a smooth and flavorful taste.  No bitterness. Cheaper buying it this way,than at store. 40 bags $3.50 great way to save. and always have them on hand



In [96]:
# Three Reviews of cluster 2
count=1
for i in range(3):
    if i < len(cluster2):
        print('Review-%d : \n %s\n'%(count,cluster2[i]))
        count +=1

Review-1 : 
 Awful Awful taste...and phosphoric sick color...<br />Awful Awful taste...and phosphoric sick color...<br />Awful Awful taste...and phosphoric sick color...<br /><br />I was totally schemed, this is not a drink. it is an acid poison.<br />Who for god sake manufacture and sale this kind of awful product.<br /><br />Don't believe it? try for your self.<br />I only show "The king is naked"

Review-2 : 
 I was misled by the name and thought that the cotechino was imported from Italy. Wrong! It was made in the US but was so salty that it was inedible. You learn from your mistakes.

Review-3 : 
 I should have listened to the other one star reviewer. My antler was NOT like the picture. Unfortunately, I too got half an antler - meaning it was half of a bisected piece. This exposes all of the marrow, gives your dog no challenge or time with the antler, and may cause an upset system / stomach for the dog. I won't buy this brand again.



In [97]:
# Three Reviews of cluster 3
count=1
for i in range(3):
    if i < len(cluster3):
        print('Review-%d : \n %s\n'%(count,cluster3[i]))
        count +=1

Review-1 : 
 Some of the finest tea I've had.  It is a pleasure on the palette, as well as to the nose.  In addition to the wonderful taste and aroma, this tea (The Choice Organic Tea line) is "Fair Trade".<br /><br />This tea is caffeine free and delicious hot or cold. --> i have added 2 bags when brewing in the sun for iced tea.  Delicious, when sweetened with natural honey!<br /><br />It is important to believe in a product, and this one uses as many Fair Trade ingredients as possible.

Review-2 : 
 Great tasting tea I have been looking for a good  Sassafras tea forever.Finally. I will be ordering this product again .<br />Thanks Kim:D

Review-3 : 
 Twinings English Afternoon Tea is a superb hot tea, delicious with milk and sugar, with a full, somewhat fruity flavor. Do not be afraid that this is a "watered-down" or weak version of the Breakfast Tea! It is every bit as full-bodied and hearty, just different, and perfect for a bit of variety. I love the English Breakfast Tea too. Try

In [98]:
# Three Reviews of cluster 4
count=1
for i in range(3):
    if i < len(cluster4):
        print('Review-%d : \n %s\n'%(count,cluster4[i]))
        count +=1

Review-1 : 
 This oil is not liquid at room temperature. Also, it has a relatively low smoke point. Those facts out of they way, this is a very high quality coconut oil. It smells wonderful and everything cooked in it takes on a hint of coconut. I use this in my coffee, fry eggs in it, saute spinach in it, as well as enjoy a tablespoon of it before my workouts. I have read that there are non-food related things you can do with this as well (good for skin, etc) but I have only experienced it through a culinary lens.

Review-2 : 
 I ordered this product to make white, strawberry-flavored icing for some cupcakes for a wedding. since it was so expensive, i did not test it to make sure it actually tasted like strawberry before baking day. My Mistake! It smelled like bad bubblegum and tasted like Windex or something - just a terrible, chemical taste. i made the icing, then tasted it and thought "what was on my finger!? nail polish remover or something?" well I hadn't done my nails, it was th

In [99]:
# Three Reviews of cluster 5
count=1
for i in range(3):
    if i < len(cluster5):
        print('Review-%d : \n %s\n'%(count,cluster5[i]))
        count +=1

Review-1 : 
 A friend introduced this tea several years ago, and I have been searching our local grocery stores and kitchen shops for it since then to no avail.  Then I thought about searching online for it and found it at the best price through Amazon.com.  I purchased a large quantity of this tea and have been serving it to family and friends throughout this Holiday Season.  Everyone loves it!!

Review-2 : 
 My sister loves Good Earth Original Caffeine Free tea.  We used to be able to get it locally, but it has become less available over the last year.  I searched several stores before checking amazon.  I was so happy to have found it here and will continue to purchase it here for as long as possible.

Review-3 : 
 This is not a good deal.  I can go to target or other grocery stores and buy four boxes for at least $6.00 cheaper...



In [100]:
# Three Reviews of cluster 6
count=1
for i in range(3):
    if i < len(cluster6):
        print('Review-%d : \n %s\n'%(count,cluster6[i]))
        count +=1

Review-1 : 
 I have tried literally dozens of teas since being introduced to the Russian custom of preparing and drinking tea, and the Ahmad Imperial blend is probably my favorite of all of these due to the fact that it is equally delicious hot or iced.  I buy the three-packs as needed from Amazon and go through them rapidly. My family loves it and my friends have it in their homes now too, try one box of it and you will love it too.

Review-2 : 
 I thought this coffee was too weak, and had a slightly sour aftertaste. I do prefer a bolder coffee, so perhaps someone who likes a lighter roast would enjoy it; I did not.<br /><br />For those with a Solofill, I tried this coffee using the refillable cup in my Keurig brewer, but it didn't work too well. I didn't see anything about the grind in the product description (maybe I just overlooked it), but this coffee is ground finely. If you do use it in the Solofill, spoon it in so it is very loose, then shake, or the water won't flow through we

In [101]:
# Three Reviews of cluster 7
count=1
for i in range(3):
    if i < len(cluster7):
        print('Review-%d : \n %s\n'%(count,cluster7[i]))
        count +=1

Review-1 : 
 Given that our dogs don't get rawhide treats often, we thought they might like to try these. However, both our 70 lb Aussie and 10 lb. Papillon rejected them. The Aussie eventually gave in and tried to bury one in the back yard, but never bothered to cover the hole. This is pretty important, because our boys will eat nearly anything! These were a nice size for small or large dogs, but neither seemed interested, despite that fact.  That said, these two dogs of ours love dehydrated cow lungs and organic carrots for snacks, so perhaps their pallates are just too refined for cardboard filled rawhide chews.

Review-2 : 
 I must say these are the best puffed lamb ears we've tried and my dog who normally has a very sensitive stomach can eat as many as she wants and never gets ill from them. They are crunchy and fun and smell good too; all the things a dog likes in a treat. We buy them all the time.

Review-3 : 
 These are not too small nor too big, an excellent snack sized treat.

In [102]:
# Three Reviews of cluster 8
count=1
for i in range(3):
    if i < len(cluster8):
        print('Review-%d : \n %s\n'%(count,cluster8[i]))
        count +=1

Review-1 : 
 This is a really nice product for those who want to consume healthful things. Refreshing, tasty and guiltless beverages are too few and far between these days, but this qualifies.<br /><br />Far too many "vitamin drinks" on the shelf are packed with empty calories, high-fructose corn syrup, and no actual fruit juice (or anything else good for you, for that matter). This is a refreshing exception.<br /><br />The pros: 70% juice, not overly calorie-ridden, the Izze drinks DO contain a bit of Vitamins C, B6 and Niacin, but do NOT contain any dreaded (and seemingly ubiquitous) high-fructose corn syrup (about which I have heard nothing good. Not one thing), and it tastes really good.<br /><br />The cons: Okay, not so much a con perhaps, but one should be aware that the product contains no more than 10% of the RDA of any of the aforementioned vitamins and minerals.<br /><br />So as long as you don't think of it as a magic health elixir, but rather as a great-tasting, refreshing 

In [103]:
# Three Reviews of cluster 9
count=1
for i in range(3):
    if i < len(cluster9):
        print('Review-%d : \n %s\n'%(count,cluster9[i]))
        count +=1

Review-1 : 
 I made crab rangoon and used this sauce as a dipping sauce.  It was great.  I love the fact that it doesn't have MSG in it.  It has the right amount of sweetness.  It is a little spicy, but just the right amount.

Review-2 : 
 Completely opposite of other reviewer.  I love these cookies.  However, if you are looking for a cookie that is laden with sugar and chemicals, this is not it.  These aren't overly sweet.  And really not too much chocolate taste in these either.  But I like them.  Finally, a cookie that tastes good that I can eat.  No chemicals, very little salt (35mg) and only 5mg cholesterol.  Oh, and the sugar is only 3g.  So, definitely not sweet.

Review-3 : 
 The label says it's Watermelon and Strawberry but I did not taste either one of these flavors.  I did taste adulterated grape and apple and something "musty like".  There wasn't really any carbonation either. With the lack of a great flavor and at 140 Calories for this tiny little can of 8.3 ounces with 36

In [104]:
# Three Reviews of cluster 10
count=1
for i in range(3):
    if i < len(cluster10):
        print('Review-%d : \n %s\n'%(count,cluster10[i]))
        count +=1

Review-1 : 
 This is the best cat food for my feral cats! The 40 lb size is a great buy, and the cat food is good for them! you really can't beat the price for decent food for feeding a lot of cats!

Review-2 : 
 I have two golden retrievers with hearty appetites.  Feeding them regular dog food makes them overweight.  I switched to Core Reduced Fat when I began to worry about the quality of the food I was feeding my dogs.  I had fed my dogs expensive dog foods which I got at the grocery store.  When I read the ingredients, I decied to look for something better quality.  From my research I learned that high quality dog foods prevent illness and disease and increase life-span.  Core is made with ingredients like real chicken--not chicken by-products or lots of grain.  Afterall, dogs are carnivores.  Since my dogs have been on Core they have not had the usual ear infections, skin allergies and irritations; their coats are beautiful; they have lots of energy and are overall very healthy.  

# CONCLUSION :-

# Procedure Followed :

STEP 1 :- Text Preprocessing

STEP 2 :- Taking all text data and ignoring class variable .

STEP 3:- Training the vectorizer on text_data and later applying same vectorizer on text_data to transform it into vectors

STEP 4:- Implementing Hierarchical Clustering using multiple values of clusters .

STEP 5:- Reading reviews manually for each cluster

Repeat from STEP 3 to STEP 5 for each of these four vectorizers : Bag Of Words(BoW), TFIDF, Avg Word2Vec and TFIDF Word2Vec