DATA SCRAPING

In [1]:
import requests
from bs4 import BeautifulSoup

In [2]:
r = requests.get('https://www.yelp.com/biz/tesla-san-francisco?osq=Tesla+Dealership&start=10')

In [3]:
soup = BeautifulSoup(r.text, 'html.parser')

In [4]:
results = soup.findAll(class_='raw__09f24__T4Ezm')

In [5]:
reviews = []
for res in results[5:]:
    reviews.append(res.text)

ANALYSIS

In [6]:
import numpy as np
import pandas as pd

In [7]:
df = pd.DataFrame(np.array(reviews), columns = ['review'])

In [8]:
df['word_count'] = df['review'].apply(lambda x: len(str(x).split(" ")))

In [9]:
df['char_count'] = df['review'].str.len()

In [10]:
def avg_word(review):
  words = review.split()
  return (sum(len(word) for word in words) / len(words))
df['avg_word'] = df['review'].apply(lambda x: avg_word(x))

In [11]:
from nltk.corpus import stopwords
stop_words = stopwords.words('english')
df['stopword_count'] = df['review'].apply(lambda x: len([x for x in x.split() if x in stop_words]))

In [12]:
df

Unnamed: 0,review,word_count,char_count,avg_word,stopword_count
0,Today was delivery day and we were pretty exci...,306,1730,4.656863,133
1,TERRIBLE customer services. The guy name CK in...,90,527,4.866667,35
2,I recently brought a brand new Tesla to fix so...,156,821,4.269231,58
3,The service department is horrible in getting ...,73,374,4.125,32
4,The service center here is TERRIBLE. I had a t...,267,1424,4.337079,113
5,The service center here is TERRIBLE. I had a t...,298,1615,4.422819,127
6,"Unfortunately, as a recent new Tesla owner, I ...",184,1079,4.86413,74
7,My Tesla 2016 model x would be the perfect car...,58,345,4.948276,23
8,I had an issue with the undercarriage cover. T...,29,164,4.655172,11
9,We dropped our car at our scheduled appointmen...,125,663,4.312,55


CLEANING

In [13]:
df['review_lowercase'] = df['review'].apply(lambda x: " ".join(x.lower() for x in x.split()))

In [14]:
df['review_nopunctuation'] = df['review_lowercase'].str.replace('[^\w\s]', '')

  df['review_nopunctuation'] = df['review_lowercase'].str.replace('[^\w\s]', '')


In [15]:
df['review_nopunc_nostop'] = df['review_nopunctuation'].apply(lambda x: " ".join(x for x in x.split() if x not in stop_words))

In [16]:
freq= pd.Series(" ".join(df['review_nopunc_nostop']).split()).value_counts()[:30]

In [17]:
other_stopwords = ['get', 'us', 'see', 'use', 'didnt', 'said', 'asked', 'day', 'go', 'even', 'ive', 'right', 'left', 'always', 'would', 'told', 'get', 'one', 'ive', 'go', 'even', 'also', 'ever', 'x', 'take', 'let', 'ap', 'say' ]

In [18]:
df['review_nopunc_nostop_nocommon'] = df['review_nopunc_nostop'].apply(lambda x: "".join(" ".join(x for x in x.split() if x not in other_stopwords)))

LEMMATIZING

In [19]:
from textblob import Word

df['cleaned_review'] = df['review_nopunc_nostop_nocommon'].apply(lambda x: " ".join([Word(word).lemmatize() for word in x.split()]))
df['cleaned_review']

0     today delivery pretty excited collect car arri...
1     terrible customer service guy name ck front id...
2     recently brought brand new tesla fix post deli...
3     service department horrible getting loaner car...
4     service center terrible tire needed patched ta...
5     service center terrible tire needed patched ta...
6     unfortunately recent new tesla owner agree maj...
7     tesla 2016 model perfect car ifservice tech kn...
8     issue undercarriage cover rep daniel manager r...
9     dropped car scheduled appointment 11am basic c...
10    inconsistency excellent service super duper ha...
Name: cleaned_review, dtype: object

SENTIMENT ANALYSIS

In [20]:
from textblob import TextBlob
df['polarity'] = df['cleaned_review'].apply(lambda x: TextBlob(x).sentiment[0])

In [21]:
df['subjectivity'] = df['cleaned_review'].apply(lambda x: TextBlob(x).sentiment[1])
df

Unnamed: 0,review,word_count,char_count,avg_word,stopword_count,review_lowercase,review_nopunctuation,review_nopunc_nostop,review_nopunc_nostop_nocommon,cleaned_review,polarity,subjectivity
0,Today was delivery day and we were pretty exci...,306,1730,4.656863,133,today was delivery day and we were pretty exci...,today was delivery day and we were pretty exci...,today delivery day pretty excited collect car ...,today delivery pretty excited collect car arri...,today delivery pretty excited collect car arri...,0.291106,0.667274
1,TERRIBLE customer services. The guy name CK in...,90,527,4.866667,35,terrible customer services. the guy name ck in...,terrible customer services the guy name ck in ...,terrible customer services guy name ck front i...,terrible customer services guy name ck front i...,terrible customer service guy name ck front id...,-0.053247,0.475758
2,I recently brought a brand new Tesla to fix so...,156,821,4.269231,58,i recently brought a brand new tesla to fix so...,i recently brought a brand new tesla to fix so...,recently brought brand new tesla fix post deli...,recently brought brand new tesla fix post deli...,recently brought brand new tesla fix post deli...,0.03683,0.429604
3,The service department is horrible in getting ...,73,374,4.125,32,the service department is horrible in getting ...,the service department is horrible in getting ...,service department horrible getting loaner car...,service department horrible getting loaner car...,service department horrible getting loaner car...,-0.4,0.6625
4,The service center here is TERRIBLE. I had a t...,267,1424,4.337079,113,the service center here is terrible. i had a t...,the service center here is terrible i had a ti...,service center terrible tire needed patched ta...,service center terrible tire needed patched ta...,service center terrible tire needed patched ta...,0.007917,0.427381
5,The service center here is TERRIBLE. I had a t...,298,1615,4.422819,127,the service center here is terrible. i had a t...,the service center here is terrible i had a ti...,service center terrible tire needed patched ta...,service center terrible tire needed patched ta...,service center terrible tire needed patched ta...,0.00754,0.426077
6,"Unfortunately, as a recent new Tesla owner, I ...",184,1079,4.86413,74,"unfortunately, as a recent new tesla owner, i ...",unfortunately as a recent new tesla owner i ha...,unfortunately recent new tesla owner agree maj...,unfortunately recent new tesla owner agree maj...,unfortunately recent new tesla owner agree maj...,0.050758,0.294886
7,My Tesla 2016 model x would be the perfect car...,58,345,4.948276,23,my tesla 2016 model x would be the perfect car...,my tesla 2016 model x would be the perfect car...,tesla 2016 model x would perfect car ifservice...,tesla 2016 model perfect car ifservice techs k...,tesla 2016 model perfect car ifservice tech kn...,0.25,0.383333
8,I had an issue with the undercarriage cover. T...,29,164,4.655172,11,i had an issue with the undercarriage cover. t...,i had an issue with the undercarriage cover th...,issue undercarriage cover rep daniel manager r...,issue undercarriage cover rep daniel manager r...,issue undercarriage cover rep daniel manager r...,0.3,0.6
9,We dropped our car at our scheduled appointmen...,125,663,4.312,55,we dropped our car at our scheduled appointmen...,we dropped our car at our scheduled appointmen...,dropped car scheduled appointment 11am told ba...,dropped car scheduled appointment 11am basic c...,dropped car scheduled appointment 11am basic c...,-0.020455,0.296861
