#Imports

In [0]:
# !pip install scattertext
# !pip install flask_cors
# !pip install python-decouple
# !pip install -U spacy
# !python -m spacy download en_core_web_sm


In [0]:
from flask import Flask, render_template, request, jsonify
import json
import warnings
import pandas as pd
import spacy
import scattertext as st
from lxml import html
from requests import Session
from concurrent.futures import ThreadPoolExecutor as Executor
import requests
# from flask_cors import CORS
# from decouple import config
import re
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
pd.set_option('display.max_colwidth', 1000)

In [0]:
nlp = spacy.load("en_core_web_sm")#if you run into problems here, 'Restart Runtime' and run all, it might fix things.

# Code


In [0]:
base_url = "https://www.yelp.com/biz/" 
api_url = "/review_feed?sort_by=date_desc&start="
bid = 'Rc1lxc5lSKJYd162JHNMfQ'

class Scraper():
    def __init__(self):
        self.data = pd.DataFrame()

    def get_data(self, n, bid=bid):
        with Session() as s:
            with s.get(base_url+bid+api_url+str(n*20)) as resp: #makes an http get request to given url and returns response as json
                r = json.loads(resp.content) #converts json response into a dictionary
                _html = html.fromstring(r['review_list']) #loads from dictionary

                dates = _html.xpath("//div[@class='review-content']/descendant::span[@class='rating-qualifier']/text()")
                reviews = [el.text for el in _html.xpath("//div[@class='review-content']/p")]
                ratings = _html.xpath("//div[@class='review-content']/descendant::div[@class='biz-rating__stars']/div/@title")

                df = pd.DataFrame([dates, reviews, ratings]).T

                self.data = pd.concat([self.data,df])

    def scrape(self): #makes it faster
        # multithreaded looping
        with Executor(max_workers=40) as e:
            list(e.map(self.get_data, range(10)))

s = Scraper()
s.scrape()
df = s.data
# df = df.sample(100)

df.head(2)

Unnamed: 0,0,1,2
0,\n 9/6/2019\n,"I must say, we were excited to try this restaurant tonight. It looked like a lot of great reviews. We did make reservations and I would strongly recommend. My daughter (age 11) and I were seating in the Atrium, which was very nice, and light (even though it was raining and gloomy). The servers took a little bit to come over, but once they did, service was impeccable. It was more of a service by committee and everyone of the team was pleasant and attentive. Let's go to the food. We started with tomato soup and a kale caesar salad. Both were excellent. We had the gnocchi and lamb for entrees. The gnocchi were very pleasant taste and the texture was nice. The lamb was cooked beautifully, a little fatty, but delicious! Potatoes and veg needed a little more seasoning, but that is being very picky. I ordered the Anything Goes and it was superb. Ranks up there on one of my favorite finds in NYC.",5.0 star rating
1,\n 9/2/2019\n,I went to Bea with a friend and we had a great meal! The food is good and so is the staff. The lighting is very dim and it can get a little loud (especially as you are seating very closely to your neighbors) but that's the vibe.,5.0 star rating


In [0]:
def customtokensize(text):
    return re.findall("[\w']+", str(text))

df['tokenized_text'] = df[1].apply(customtokensize)
df.head(2)

Unnamed: 0,0,1,2,tokenized_text
0,\n 9/6/2019\n,"I must say, we were excited to try this restaurant tonight. It looked like a lot of great reviews. We did make reservations and I would strongly recommend. My daughter (age 11) and I were seating in the Atrium, which was very nice, and light (even though it was raining and gloomy). The servers took a little bit to come over, but once they did, service was impeccable. It was more of a service by committee and everyone of the team was pleasant and attentive. Let's go to the food. We started with tomato soup and a kale caesar salad. Both were excellent. We had the gnocchi and lamb for entrees. The gnocchi were very pleasant taste and the texture was nice. The lamb was cooked beautifully, a little fatty, but delicious! Potatoes and veg needed a little more seasoning, but that is being very picky. I ordered the Anything Goes and it was superb. Ranks up there on one of my favorite finds in NYC.",5.0 star rating,"[I, must, say, we, were, excited, to, try, this, restaurant, tonight, It, looked, like, a, lot, of, great, reviews, We, did, make, reservations, and, I, would, strongly, recommend, My, daughter, age, 11, and, I, were, seating, in, the, Atrium, which, was, very, nice, and, light, even, though, it, was, raining, and, gloomy, The, servers, took, a, little, bit, to, come, over, but, once, they, did, service, was, impeccable, It, was, more, of, a, service, by, committee, and, everyone, of, the, team, was, pleasant, and, attentive, Let's, go, to, the, food, We, started, with, tomato, soup, and, a, kale, caesar, salad, ...]"
1,\n 9/2/2019\n,I went to Bea with a friend and we had a great meal! The food is good and so is the staff. The lighting is very dim and it can get a little loud (especially as you are seating very closely to your neighbors) but that's the vibe.,5.0 star rating,"[I, went, to, Bea, with, a, friend, and, we, had, a, great, meal, The, food, is, good, and, so, is, the, staff, The, lighting, is, very, dim, and, it, can, get, a, little, loud, especially, as, you, are, seating, very, closely, to, your, neighbors, but, that's, the, vibe]"


In [0]:
stopwords = ['and','was','were','had','check-in','=','= =','u','want', 'u want', 'cuz','him',"i've",'on', 'her','told','ins', '1 check','I', 'i"m', 'i', ' ', 'it', "it's", 'it.','they', 'the', 'this','its', 'l','they','this',"don't",'the ', ' the', 'it', 'i"ve', 'i"m', '!', '1','2','3','4', '5','6','7','8','9','0','/','.',',']

def filter_stopwords(text):
  nonstopwords = []
  for i in text:
    if i not in stopwords:
      nonstopwords.append(i)
  return nonstopwords
df['tokenized_text'] = df['tokenized_text'].apply(filter_stopwords)
df['parts_of_speech_reference'] = df['tokenized_text'].apply(filter_stopwords)
df['parts_of_speech_reference'] = df['parts_of_speech_reference'].str.join(' ')
df.head(2)

Unnamed: 0,0,1,2,tokenized_text,parts_of_speech_reference
0,\n 9/6/2019\n,"I must say, we were excited to try this restaurant tonight. It looked like a lot of great reviews. We did make reservations and I would strongly recommend. My daughter (age 11) and I were seating in the Atrium, which was very nice, and light (even though it was raining and gloomy). The servers took a little bit to come over, but once they did, service was impeccable. It was more of a service by committee and everyone of the team was pleasant and attentive. Let's go to the food. We started with tomato soup and a kale caesar salad. Both were excellent. We had the gnocchi and lamb for entrees. The gnocchi were very pleasant taste and the texture was nice. The lamb was cooked beautifully, a little fatty, but delicious! Potatoes and veg needed a little more seasoning, but that is being very picky. I ordered the Anything Goes and it was superb. Ranks up there on one of my favorite finds in NYC.",5.0 star rating,"[must, say, we, excited, to, try, restaurant, tonight, It, looked, like, a, lot, of, great, reviews, We, did, make, reservations, would, strongly, recommend, My, daughter, age, 11, seating, in, Atrium, which, very, nice, light, even, though, raining, gloomy, The, servers, took, a, little, bit, to, come, over, but, once, did, service, impeccable, It, more, of, a, service, by, committee, everyone, of, team, pleasant, attentive, Let's, go, to, food, We, started, with, tomato, soup, a, kale, caesar, salad, Both, excellent, We, gnocchi, lamb, for, entrees, The, gnocchi, very, pleasant, taste, texture, nice, The, lamb, cooked, beautifully, a, little, fatty, but, delicious, ...]",must say we excited to try restaurant tonight It looked like a lot of great reviews We did make reservations would strongly recommend My daughter age 11 seating in Atrium which very nice light even though raining gloomy The servers took a little bit to come over but once did service impeccable It more of a service by committee everyone of team pleasant attentive Let's go to food We started with tomato soup a kale caesar salad Both excellent We gnocchi lamb for entrees The gnocchi very pleasant taste texture nice The lamb cooked beautifully a little fatty but delicious Potatoes veg needed a little more seasoning but that is being very picky ordered Anything Goes superb Ranks up there one of my favorite finds in NYC
1,\n 9/2/2019\n,I went to Bea with a friend and we had a great meal! The food is good and so is the staff. The lighting is very dim and it can get a little loud (especially as you are seating very closely to your neighbors) but that's the vibe.,5.0 star rating,"[went, to, Bea, with, a, friend, we, a, great, meal, The, food, is, good, so, is, staff, The, lighting, is, very, dim, can, get, a, little, loud, especially, as, you, are, seating, very, closely, to, your, neighbors, but, that's, vibe]",went to Bea with a friend we a great meal The food is good so is staff The lighting is very dim can get a little loud especially as you are seating very closely to your neighbors but that's vibe


In [0]:
def find_part_of_speech(x):
  """Use spacy's entity recognition to recogize if word is noun, verb, adjective, etc."""
  part_of_speech = []
  doc = nlp(str(x))
  for token in doc:
    part_of_speech.append(token.pos_)
  return part_of_speech

df['parts_of_speech'] = df['parts_of_speech_reference'].apply(find_part_of_speech)
df.head(2)

Unnamed: 0,0,1,2,tokenized_text,parts_of_speech_reference,parts_of_speech
0,\n 9/6/2019\n,"I must say, we were excited to try this restaurant tonight. It looked like a lot of great reviews. We did make reservations and I would strongly recommend. My daughter (age 11) and I were seating in the Atrium, which was very nice, and light (even though it was raining and gloomy). The servers took a little bit to come over, but once they did, service was impeccable. It was more of a service by committee and everyone of the team was pleasant and attentive. Let's go to the food. We started with tomato soup and a kale caesar salad. Both were excellent. We had the gnocchi and lamb for entrees. The gnocchi were very pleasant taste and the texture was nice. The lamb was cooked beautifully, a little fatty, but delicious! Potatoes and veg needed a little more seasoning, but that is being very picky. I ordered the Anything Goes and it was superb. Ranks up there on one of my favorite finds in NYC.",5.0 star rating,"[must, say, we, excited, to, try, restaurant, tonight, It, looked, like, a, lot, of, great, reviews, We, did, make, reservations, would, strongly, recommend, My, daughter, age, 11, seating, in, Atrium, which, very, nice, light, even, though, raining, gloomy, The, servers, took, a, little, bit, to, come, over, but, once, did, service, impeccable, It, more, of, a, service, by, committee, everyone, of, team, pleasant, attentive, Let's, go, to, food, We, started, with, tomato, soup, a, kale, caesar, salad, Both, excellent, We, gnocchi, lamb, for, entrees, The, gnocchi, very, pleasant, taste, texture, nice, The, lamb, cooked, beautifully, a, little, fatty, but, delicious, ...]",must say we excited to try restaurant tonight It looked like a lot of great reviews We did make reservations would strongly recommend My daughter age 11 seating in Atrium which very nice light even though raining gloomy The servers took a little bit to come over but once did service impeccable It more of a service by committee everyone of team pleasant attentive Let's go to food We started with tomato soup a kale caesar salad Both excellent We gnocchi lamb for entrees The gnocchi very pleasant taste texture nice The lamb cooked beautifully a little fatty but delicious Potatoes veg needed a little more seasoning but that is being very picky ordered Anything Goes superb Ranks up there one of my favorite finds in NYC,"[VERB, VERB, PRON, ADJ, PART, VERB, NOUN, NOUN, PRON, VERB, SCONJ, DET, NOUN, ADP, ADJ, NOUN, PRON, AUX, VERB, NOUN, VERB, ADV, VERB, DET, NOUN, NOUN, NUM, NOUN, ADP, PROPN, DET, ADV, ADJ, NOUN, ADV, SCONJ, VERB, NOUN, DET, NOUN, VERB, DET, ADJ, NOUN, PART, VERB, ADP, CCONJ, ADV, AUX, NOUN, VERB, PRON, ADJ, ADP, DET, NOUN, ADP, NOUN, PRON, ADP, NOUN, ADJ, NOUN, VERB, PRON, VERB, ADP, NOUN, PRON, VERB, ADP, NOUN, NOUN, DET, PROPN, NOUN, NOUN, DET, NOUN, PRON, VERB, NOUN, ADP, NOUN, DET, NOUN, ADV, ADJ, NOUN, NOUN, ADJ, DET, NOUN, VERB, ADV, DET, ADJ, ADJ, CCONJ, ...]"
1,\n 9/2/2019\n,I went to Bea with a friend and we had a great meal! The food is good and so is the staff. The lighting is very dim and it can get a little loud (especially as you are seating very closely to your neighbors) but that's the vibe.,5.0 star rating,"[went, to, Bea, with, a, friend, we, a, great, meal, The, food, is, good, so, is, staff, The, lighting, is, very, dim, can, get, a, little, loud, especially, as, you, are, seating, very, closely, to, your, neighbors, but, that's, vibe]",went to Bea with a friend we a great meal The food is good so is staff The lighting is very dim can get a little loud especially as you are seating very closely to your neighbors but that's vibe,"[VERB, ADP, PROPN, ADP, DET, NOUN, PRON, DET, ADJ, NOUN, DET, NOUN, AUX, ADJ, ADV, AUX, NOUN, DET, NOUN, AUX, ADV, ADJ, VERB, AUX, DET, ADJ, ADJ, ADV, SCONJ, PRON, AUX, VERB, ADV, ADV, ADP, DET, NOUN, CCONJ, DET, AUX, NOUN]"


In [0]:
def find_adj(x):
  """Get Just the Adjectives"""
  adj_list = []
  doc = nlp(str(x))
  for token in doc:
    if token.pos_ == 'ADJ':
      adj_list.append(token)
  return adj_list

df['adj_list'] = df['parts_of_speech_reference'].apply(find_adj)
df.head(2)

Unnamed: 0,0,1,2,tokenized_text,parts_of_speech_reference,parts_of_speech,adj_list
0,\n 9/6/2019\n,"I must say, we were excited to try this restaurant tonight. It looked like a lot of great reviews. We did make reservations and I would strongly recommend. My daughter (age 11) and I were seating in the Atrium, which was very nice, and light (even though it was raining and gloomy). The servers took a little bit to come over, but once they did, service was impeccable. It was more of a service by committee and everyone of the team was pleasant and attentive. Let's go to the food. We started with tomato soup and a kale caesar salad. Both were excellent. We had the gnocchi and lamb for entrees. The gnocchi were very pleasant taste and the texture was nice. The lamb was cooked beautifully, a little fatty, but delicious! Potatoes and veg needed a little more seasoning, but that is being very picky. I ordered the Anything Goes and it was superb. Ranks up there on one of my favorite finds in NYC.",5.0 star rating,"[must, say, we, excited, to, try, restaurant, tonight, It, looked, like, a, lot, of, great, reviews, We, did, make, reservations, would, strongly, recommend, My, daughter, age, 11, seating, in, Atrium, which, very, nice, light, even, though, raining, gloomy, The, servers, took, a, little, bit, to, come, over, but, once, did, service, impeccable, It, more, of, a, service, by, committee, everyone, of, team, pleasant, attentive, Let's, go, to, food, We, started, with, tomato, soup, a, kale, caesar, salad, Both, excellent, We, gnocchi, lamb, for, entrees, The, gnocchi, very, pleasant, taste, texture, nice, The, lamb, cooked, beautifully, a, little, fatty, but, delicious, ...]",must say we excited to try restaurant tonight It looked like a lot of great reviews We did make reservations would strongly recommend My daughter age 11 seating in Atrium which very nice light even though raining gloomy The servers took a little bit to come over but once did service impeccable It more of a service by committee everyone of team pleasant attentive Let's go to food We started with tomato soup a kale caesar salad Both excellent We gnocchi lamb for entrees The gnocchi very pleasant taste texture nice The lamb cooked beautifully a little fatty but delicious Potatoes veg needed a little more seasoning but that is being very picky ordered Anything Goes superb Ranks up there one of my favorite finds in NYC,"[VERB, VERB, PRON, ADJ, PART, VERB, NOUN, NOUN, PRON, VERB, SCONJ, DET, NOUN, ADP, ADJ, NOUN, PRON, AUX, VERB, NOUN, VERB, ADV, VERB, DET, NOUN, NOUN, NUM, NOUN, ADP, PROPN, DET, ADV, ADJ, NOUN, ADV, SCONJ, VERB, NOUN, DET, NOUN, VERB, DET, ADJ, NOUN, PART, VERB, ADP, CCONJ, ADV, AUX, NOUN, VERB, PRON, ADJ, ADP, DET, NOUN, ADP, NOUN, PRON, ADP, NOUN, ADJ, NOUN, VERB, PRON, VERB, ADP, NOUN, PRON, VERB, ADP, NOUN, NOUN, DET, PROPN, NOUN, NOUN, DET, NOUN, PRON, VERB, NOUN, ADP, NOUN, DET, NOUN, ADV, ADJ, NOUN, NOUN, ADJ, DET, NOUN, VERB, ADV, DET, ADJ, ADJ, CCONJ, ...]","[excited, great, nice, little, more, pleasant, pleasant, nice, little, fatty, seasoning, picky, favorite]"
1,\n 9/2/2019\n,I went to Bea with a friend and we had a great meal! The food is good and so is the staff. The lighting is very dim and it can get a little loud (especially as you are seating very closely to your neighbors) but that's the vibe.,5.0 star rating,"[went, to, Bea, with, a, friend, we, a, great, meal, The, food, is, good, so, is, staff, The, lighting, is, very, dim, can, get, a, little, loud, especially, as, you, are, seating, very, closely, to, your, neighbors, but, that's, vibe]",went to Bea with a friend we a great meal The food is good so is staff The lighting is very dim can get a little loud especially as you are seating very closely to your neighbors but that's vibe,"[VERB, ADP, PROPN, ADP, DET, NOUN, PRON, DET, ADJ, NOUN, DET, NOUN, AUX, ADJ, ADV, AUX, NOUN, DET, NOUN, AUX, ADV, ADJ, VERB, AUX, DET, ADJ, ADJ, ADV, SCONJ, PRON, AUX, VERB, ADV, ADV, ADP, DET, NOUN, CCONJ, DET, AUX, NOUN]","[great, good, dim, little, loud]"


In [0]:
def find_phrases(x):
  """Create a list where adjectives come immediately before nouns for each review"""
  adj_list = []
  doc = nlp(str(x))
  try:
    for token in range(len(doc)):
      sub_list = []
      if (doc[token].pos_ == 'ADJ'and doc[token+1].pos_ =='NOUN') or (doc[token].pos_ == 'VERB'and doc[token+1].pos_ =='NOUN'):
        sub_list.append(doc[token])
        sub_list.append(doc[token+1])
      elif (doc[token].pos_ == 'ADJ'and doc[token+1].pos_ == 'ADJ'and doc[token+2].pos_ =='NOUN')or (doc[token].pos_ == 'ADJ'and doc[token+1].pos_ =='VERB'and doc[token+2].pos_ =='NOUN')or (doc[token].pos_ == 'ADJ'and doc[token+1].pos_ == 'NOUN'and doc[token+2].pos_ =='NOUN'):
        sub_list.append(doc[token])
        sub_list.append(doc[token+1])        
        sub_list.append(doc[token+2])
      if (doc[token].lemma_ == 'wait'):
        sub_list.append(doc[token-2]) 
        sub_list.append(doc[token-1])
        sub_list.append(doc[token])
        sub_list.append(doc[token+1])
        sub_list.append(doc[token+2])
        sub_list.append(doc[token+3])
      if (doc[token].lemma_ == 'service'):
        sub_list.append(doc[token-2]) 
        sub_list.append(doc[token-1])
        sub_list.append(doc[token])
        sub_list.append(doc[token+1])
        sub_list.append(doc[token+2])
        sub_list.append(doc[token+3])
      if len(sub_list) != 0:
        adj_list.append(sub_list)
    return adj_list
  except IndexError as e:
    pass

df['adj_noun_phrases'] = df['parts_of_speech_reference'].apply(find_phrases)
df['adj_noun_phrases'].sample(200)

6                                                                                                                                                                                                                                                                                    [[nice, surprise]]
16                                                                                                                                                                                                                                                                                                   []
17                                                                               [[worst, restaurants], [theater, tickets], [entered, restaurant], [minimal, taste], [minimal, alcohol], [pomegranate, juice], [got, seltzer], [eating, leftovers], [better, run, establishment], [run, establishment]]
18                                                                                                              

In [0]:
doc = nlp("Apple is looking at buying U.K. startup for $1 billion")

for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

Apple 0 5 ORG
U.K. 27 31 GPE
$1 billion 44 54 MONEY


In [0]:
def find_money(x):
  """Create a list where adjectives come immediately before nouns for each review"""
  money_list = []
  doc = nlp(str(x))
  for ent in doc.ents:
    if ent.label_ == 'MONEY':
      money_list.append(ent)
  return money_list

df['money_list'] = df['parts_of_speech_reference'].apply(find_money)
df['money_list'].sample(200)

9                                 []
8                                 []
6                                 []
11                                []
16                                []
19                                []
6                                 []
19                                []
20                                []
10                                []
15                                []
1                                 []
14                                []
7                                 []
7                                 []
6                                 []
2                                 []
19                                []
9                                 []
12                                []
5                                 []
7                                 []
3                                 []
16                                []
7                                 []
8                                 []
10                                []
5

In [0]:
def find_names(x):
  """If Spacy identifies name, organization, or geographic location. The resulting are Generic Locations Mentioned, not very useful"""
  name_list = []
  doc = nlp(str(x))
  for ent in doc.ents:
    if ent.label_ == 'ORG' or ent.label_ == 'GPE':
      name_list.append(ent)
  return name_list

df['name_list'] = df['parts_of_speech_reference'].apply(find_names)
df[['parts_of_speech_reference','name_list']]

Unnamed: 0,parts_of_speech_reference,name_list
0,must say we excited to try restaurant tonight It looked like a lot of great reviews We did make reservations would strongly recommend My daughter age 11 seating in Atrium which very nice light even though raining gloomy The servers took a little bit to come over but once did service impeccable It more of a service by committee everyone of team pleasant attentive Let's go to food We started with tomato soup a kale caesar salad Both excellent We gnocchi lamb for entrees The gnocchi very pleasant taste texture nice The lamb cooked beautifully a little fatty but delicious Potatoes veg needed a little more seasoning but that is being very picky ordered Anything Goes superb Ranks up there one of my favorite finds in NYC,"[(Atrium), (tomato), (NYC)]"
1,went to Bea with a friend we a great meal The food is good so is staff The lighting is very dim can get a little loud especially as you are seating very closely to your neighbors but that's vibe,[]
2,Phenomenal food drinks used to go to BEA for just cocktails until dinner there one night Then recently went for Sunday brunch honestly food never disappoints,[(BEA)]
3,Saw reviews which pretty positive Made reservations for for brunch won't return Service slow never refilled coffee or checked to see how our meal Eggs Benedict eggs over cooked one a broken yoke meal tasted like water from poached eggs put dish Very bland,[]
4,Great unique vibe very chill yet lively Great place for before show dinner drinks Great service Highly recommend,[]
5,Best place for a romantic date Waiter super nice food delicious We made reservations two hours before getting there,[]
6,Awesome brunch great service fun atmosphere This place gets slammed We got here early ish snagged a table The food is worth,[]
7,Simply Amazing A crafty bar restaurant located in Midtown West loved everything about The waiter very nice he gave us perfect recommendations,[]
8,Went in with no reservation a Sunday prefixe brunch for 26 is a good deal Plus Food good tables are a bit small though is only downside,[(prefixe)]
9,Nice place for drinks dinner best to make reservations,[]


In [0]:
def find_noun_chunks(x):
  """Create a list where adjectives come immediately before nouns for each review"""
  noun_list = []
  doc = nlp(str(x))
  for chunk in doc.noun_chunks:
      noun_list.append(chunk)
  return noun_list

df['noun_chunks'] = df['parts_of_speech_reference'].apply(find_noun_chunks)
df['noun_chunks'].sample(200)

7                                                                                                                                                                                                                                                                                                                                                                                                                        [(Delicious, food, cool, hipster, vibe, plenty), (things), (you), (something), (The, courtyard), (a, great, place), (a, first, date), (anniversary), (few, places), (I), (a, reasonably, priced, meat, cheese, board), (city)]
9                                                                                                                                                                                                                                                                                                                                                                               

In [0]:
def manual_noun_chunks(x):
  """Create a list where adjectives come immediately before nouns for each review"""
  noun_list = []
  doc = nlp(str(x))
  for chunk in doc.noun_chunks:
      noun_list.append(chunk)
  return noun_list

df['manual_noun_chunks'] = df['parts_of_speech_reference'].apply(manual_noun_chunks)
df['noun_chunks'].sample(200)

In [0]:
doc = nlp("Autonomous cars shift insurance liability toward manufacturers")
for token in doc:
    print(token.text, token.dep_, token.head.text, token.head.pos_,
            [child for child in token.children])

Autonomous amod cars NOUN []
cars nsubj shift VERB [Autonomous]
shift ROOT shift VERB [cars, liability]
insurance compound liability NOUN []
liability dobj shift VERB [insurance, toward]
toward prep liability NOUN [manufacturers]
manufacturers pobj toward ADP []


In [0]:
def find_noun_chunks(x):
  """Using Spacy's Parse Tree, extracting longer phrases of nouns and children words that relate"""
  noun_list = []
  doc = nlp(str(x))
  try:
    for token in range(len(doc)):
      sub_list = []
      if (doc[token].pos_ == 'NOUN' and doc[token+1].pos_ == 'NOUN'):
        sub_list.append(doc[token-1])
        sub_list.append(doc[token])
        sub_list.append(doc[token+1])
        sub_list.append(doc[token+2])
        sub_list.append(doc[token+3])
        sub_list.append(doc[token+4])
        sub_list.append(doc[token+5])
        sub_list.append(doc[token+6])
      if len(sub_list) != 0:
        noun_list.append(sub_list)
    return noun_list
  except IndexError as e:
    pass

df['noun_chunks'] = df['parts_of_speech_reference'].apply(find_noun_chunks)
# df['noun_chunks'].sample(200)
df[['parts_of_speech_reference','noun_chunks']]

Unnamed: 0,parts_of_speech_reference,noun_chunks
0,must say we excited to try restaurant tonight It looked like a lot of great reviews We did make reservations would strongly recommend My daughter age 11 seating in Atrium which very nice light even though raining gloomy The servers took a little bit to come over but once did service impeccable It more of a service by committee everyone of team pleasant attentive Let's go to food We started with tomato soup a kale caesar salad Both excellent We gnocchi lamb for entrees The gnocchi very pleasant taste texture nice The lamb cooked beautifully a little fatty but delicious Potatoes veg needed a little more seasoning but that is being very picky ordered Anything Goes superb Ranks up there one of my favorite finds in NYC,"[[try, restaurant, tonight, It, looked, like, a, lot], [My, daughter, age, 11, seating, in, Atrium, which], [with, tomato, soup, a, kale, caesar, salad, Both], [kale, caesar, salad, Both, excellent, We, gnocchi, lamb], [pleasant, taste, texture, nice, The, lamb, cooked, beautifully]]"
1,went to Bea with a friend we a great meal The food is good so is staff The lighting is very dim can get a little loud especially as you are seating very closely to your neighbors but that's vibe,
2,Phenomenal food drinks used to go to BEA for just cocktails until dinner there one night Then recently went for Sunday brunch honestly food never disappoints,
3,Saw reviews which pretty positive Made reservations for for brunch won't return Service slow never refilled coffee or checked to see how our meal Eggs Benedict eggs over cooked one a broken yoke meal tasted like water from poached eggs put dish Very bland,[]
4,Great unique vibe very chill yet lively Great place for before show dinner drinks Great service Highly recommend,"[[before, show, dinner, drinks, Great, service, Highly, recommend]]"
5,Best place for a romantic date Waiter super nice food delicious We made reservations two hours before getting there,[]
6,Awesome brunch great service fun atmosphere This place gets slammed We got here early ish snagged a table The food is worth,"[[great, service, fun, atmosphere, This, place, gets, slammed], [service, fun, atmosphere, This, place, gets, slammed, We]]"
7,Simply Amazing A crafty bar restaurant located in Midtown West loved everything about The waiter very nice he gave us perfect recommendations,
8,Went in with no reservation a Sunday prefixe brunch for 26 is a good deal Plus Food good tables are a bit small though is only downside,"[[Sunday, prefixe, brunch, for, 26, is, a, good]]"
9,Nice place for drinks dinner best to make reservations,


In [0]:
def find_verb(x):
  """Get Just the Adjectives"""
  adj_list = []
  doc = nlp(str(x))
  try:
    for token in range(len(doc)):
      sub_list = []
      if (doc[token].pos_ == 'VERB' and doc[token+1].pos_ == 'NOUN') or (doc[token-1].pos_ == 'NOUN' and doc[token].pos_ == 'VERB'):
        sub_list.append(doc[token-1])
        sub_list.append(doc[token])
        sub_list.append(doc[token+1])
        sub_list.append(doc[token+2])
        sub_list.append(doc[token+3])
      if len(sub_list) != 0:
        adj_list.append(sub_list)
    return adj_list
  except IndexError as e:
    pass

df['verb_list'] = df['parts_of_speech_reference'].apply(find_verb)
df[['parts_of_speech_reference','parts_of_speech','verb_list']]

Unnamed: 0,parts_of_speech_reference,parts_of_speech,verb_list
0,must say we excited to try restaurant tonight It looked like a lot of great reviews We did make reservations would strongly recommend My daughter age 11 seating in Atrium which very nice light even though raining gloomy The servers took a little bit to come over but once did service impeccable It more of a service by committee everyone of team pleasant attentive Let's go to food We started with tomato soup a kale caesar salad Both excellent We gnocchi lamb for entrees The gnocchi very pleasant taste texture nice The lamb cooked beautifully a little fatty but delicious Potatoes veg needed a little more seasoning but that is being very picky ordered Anything Goes superb Ranks up there one of my favorite finds in NYC,"[VERB, VERB, PRON, ADJ, PART, VERB, NOUN, NOUN, PRON, VERB, SCONJ, DET, NOUN, ADP, ADJ, NOUN, PRON, AUX, VERB, NOUN, VERB, ADV, VERB, DET, NOUN, NOUN, NUM, NOUN, ADP, PROPN, DET, ADV, ADJ, NOUN, ADV, SCONJ, VERB, NOUN, DET, NOUN, VERB, DET, ADJ, NOUN, PART, VERB, ADP, CCONJ, ADV, AUX, NOUN, VERB, PRON, ADJ, ADP, DET, NOUN, ADP, NOUN, PRON, ADP, NOUN, ADJ, NOUN, VERB, PRON, VERB, ADP, NOUN, PRON, VERB, ADP, NOUN, NOUN, DET, PROPN, NOUN, NOUN, DET, NOUN, PRON, VERB, NOUN, ADP, NOUN, DET, NOUN, ADV, ADJ, NOUN, NOUN, ADJ, DET, NOUN, VERB, ADV, DET, ADJ, ADJ, CCONJ, ...]","[[to, try, restaurant, tonight, It], [did, make, reservations, would, strongly], [reservations, would, strongly, recommend, My], [though, raining, gloomy, The, servers], [servers, took, a, little, bit], [service, impeccable, It, more, of], [attentive, Let, 's, go, to], [We, gnocchi, lamb, for, entrees], [lamb, cooked, beautifully, a, little], [Anything, Goes, superb, Ranks, up], [superb, Ranks, up, there, one]]"
1,went to Bea with a friend we a great meal The food is good so is staff The lighting is very dim can get a little loud especially as you are seating very closely to your neighbors but that's vibe,"[VERB, ADP, PROPN, ADP, DET, NOUN, PRON, DET, ADJ, NOUN, DET, NOUN, AUX, ADJ, ADV, AUX, NOUN, DET, NOUN, AUX, ADV, ADJ, VERB, AUX, DET, ADJ, ADJ, ADV, SCONJ, PRON, AUX, VERB, ADV, ADV, ADP, DET, NOUN, CCONJ, DET, AUX, NOUN]","[[vibe, went, to, Bea, with]]"
2,Phenomenal food drinks used to go to BEA for just cocktails until dinner there one night Then recently went for Sunday brunch honestly food never disappoints,"[ADJ, NOUN, NOUN, VERB, PART, VERB, ADP, PROPN, ADP, ADJ, NOUN, ADP, NOUN, ADV, NUM, NOUN, ADV, ADV, VERB, ADP, PROPN, NOUN, ADV, NOUN, ADV, NOUN]","[[drinks, used, to, go, to]]"
3,Saw reviews which pretty positive Made reservations for for brunch won't return Service slow never refilled coffee or checked to see how our meal Eggs Benedict eggs over cooked one a broken yoke meal tasted like water from poached eggs put dish Very bland,"[VERB, NOUN, DET, ADV, ADJ, VERB, NOUN, ADP, ADP, NOUN, VERB, PART, VERB, PROPN, ADJ, ADV, VERB, NOUN, CCONJ, VERB, PART, VERB, ADV, DET, NOUN, PROPN, PROPN, NOUN, ADP, VERB, NUM, DET, VERB, ADJ, NOUN, VERB, SCONJ, NOUN, ADP, ADJ, NOUN, VERB, NOUN, ADV, ADJ]","[[bland, Saw, reviews, which, pretty], [positive, Made, reservations, for, for], [brunch, wo, n't, return, Service], [never, refilled, coffee, or, checked], [meal, tasted, like, water, from], [eggs, put, dish, Very, bland]]"
4,Great unique vibe very chill yet lively Great place for before show dinner drinks Great service Highly recommend,"[ADJ, ADJ, NOUN, ADV, NOUN, ADV, ADJ, ADJ, NOUN, ADP, ADP, NOUN, NOUN, VERB, ADJ, NOUN, ADV, VERB]",
5,Best place for a romantic date Waiter super nice food delicious We made reservations two hours before getting there,"[ADJ, NOUN, ADP, DET, ADJ, NOUN, PROPN, ADV, ADJ, NOUN, ADJ, PRON, VERB, NOUN, NUM, NOUN, ADP, VERB, ADV]","[[We, made, reservations, two, hours]]"
6,Awesome brunch great service fun atmosphere This place gets slammed We got here early ish snagged a table The food is worth,"[ADJ, ADJ, ADJ, NOUN, NOUN, NOUN, DET, NOUN, VERB, VERB, PRON, VERB, ADV, ADJ, PROPN, VERB, DET, NOUN, DET, NOUN, AUX, ADJ]","[[place, gets, slammed, We, got]]"
7,Simply Amazing A crafty bar restaurant located in Midtown West loved everything about The waiter very nice he gave us perfect recommendations,"[ADV, ADJ, DET, ADJ, NOUN, NOUN, VERB, ADP, PROPN, PROPN, VERB, PRON, ADP, DET, NOUN, ADV, ADJ, PRON, VERB, PRON, ADJ, NOUN]","[[restaurant, located, in, Midtown, West]]"
8,Went in with no reservation a Sunday prefixe brunch for 26 is a good deal Plus Food good tables are a bit small though is only downside,"[VERB, ADP, ADP, DET, NOUN, DET, PROPN, NOUN, NOUN, ADP, NUM, AUX, DET, ADJ, NOUN, PROPN, PROPN, ADJ, NOUN, AUX, DET, NOUN, ADJ, SCONJ, AUX, ADV, ADJ]",[]
9,Nice place for drinks dinner best to make reservations,"[ADJ, NOUN, ADP, VERB, NOUN, ADV, PART, VERB, NOUN]",


###anything after in preparation to feed back into top 10 and bottom 10 endpoint

In [0]:
corpus = st.CorpusFromPandas(df, 
                          category_col=2, 
                          text_col=1,
                          nlp=nlp).build()

term_freq_df = corpus.get_term_freq_df()
term_freq_df['highratingscore'] = corpus.get_scaled_f_scores('5.0 star rating')

term_freq_df['poorratingscore'] = corpus.get_scaled_f_scores('1.0 star rating')
dh = term_freq_df.sort_values(by= 'highratingscore', ascending = False)
dh = dh[['highratingscore', 'poorratingscore']]
dh = dh.reset_index(drop=False)

In [0]:
dh = dh.rename(columns={'highratingscore':'score'})
dh = dh.drop(columns='poorratingscore')
positive_df = dh.head(10)
negative_df = dh.tail(10)
# word_df = pd.concat([positive_df, negative_df])
# word_df

In [0]:
results = {'positive': [{'term': pos_term, 'score': pos_score} for pos_term, pos_score in zip(positive_df['term'], positive_df['score'])], 'negative': [{'term': neg_term, 'score': neg_score} for neg_term, neg_score in zip(negative_df['term'], negative_df['score'])]}

In [0]:
results

{'negative': [{'score': 0.039700804423687064, 'term': 'and i'},
  {'score': 0.037224653530486096, 'term': 'they'},
  {'score': 0.034710278387564464, 'term': 'got'},
  {'score': 0.034707647404456254, 'term': 'that'},
  {'score': 0.030518863066079205, 'term': 'pasta'},
  {'score': 0.02680935655979494, 'term': 'would'},
  {'score': 0.019928744061453618, 'term': 'like'},
  {'score': 0.017201416211207965, 'term': 'have'},
  {'score': 0.008092649721088696, 'term': 'not'},
  {'score': 0.0, 'term': 'eggs'}],
 'positive': [{'score': 1.0, 'term': 'loved'},
  {'score': 0.988956800864008, 'term': 'attentive'},
  {'score': 0.9815765935875795, 'term': 'was great'},
  {'score': 0.9812644776389665, 'term': 'cocktails'},
  {'score': 0.9718924731693325, 'term': 'loved the'},
  {'score': 0.9663643184788641, 'term': 'vibe'},
  {'score': 0.9663643184788641, 'term': 'well'},
  {'score': 0.9659480602166295, 'term': 'by'},
  {'score': 0.9585377232137955, 'term': 'amazing'},
  {'score': 0.9533338688043136, 'te