# COVID-19's Business Impact Using Yelp Dataset

In [76]:
from tqdm import tqdm

import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import plotly.offline as py
py.offline.init_notebook_mode(connected=True)

import chart_studio

import nltk
from nltk import FreqDist
from nltk.corpus import stopwords

In [3]:
username = 'beiqizhou' # medium username
api_key = 'YJeWb4eTmUSJUEqyeIwg' # medium api key 
chart_studio.tools.set_credentials_file(username=username, api_key=api_key)

## 1. Review Counts As An Indicator of Business Performance: 

In [5]:
data_review = pd.read_csv("C:/Users/becky/OneDrive/Desktop/yelp/yelp_review.csv")

In [6]:
data_review.dtypes

review_id       object
user_id         object
business_id     object
stars          float64
useful           int64
funny            int64
cool             int64
text            object
date            object
year             int64
year_month      object
review_day      object
dtype: object

### 1.1 Review Counts Per Year

In [7]:
data_review["year"] = data_review["date"].str[:4]

In [8]:
data_review1 = data_review[["year","business_id"]].groupby(["year"]).size().reset_index(name='counts')

In [9]:
fig = px.bar(data_review1, x="year",y="counts",title = "Review Counts Per Year",color="counts",
            color_continuous_scale=px.colors.sequential.Reds,
            labels=dict(counts="Total Review Counts", year="Year"))
fig.show()

In [10]:
import chart_studio.plotly as py
py.plot(fig, filename = 'Review_years', auto_open=True)

'https://plotly.com/~beiqizhou/55/'

### 1.2 Over Years Percentage Changes in Review Counts

In [11]:
data_review1["Review_Percent_Change"] = data_review1["counts"].pct_change()

In [12]:
data_review1["Color"] = np.where(data_review1["Review_Percent_Change"]<0, '#ad1a0c', '#5c8c3c')

In [13]:
fig = go.Figure()
fig.add_trace(
    go.Bar(name='Review_Percent_Change',
           x=data_review1['year'][2:],
           y=data_review1['Review_Percent_Change'][2:],
           marker_color=data_review1['Color'][2:]))
fig.update_layout(barmode='stack',
                 title ="Over Years Percentage Changes in Review Counts")

fig.update_xaxes(title="Years")

fig.update_yaxes(title="Percentage Change (%)")

fig.show()

In [14]:
import chart_studio.plotly as py
py.plot(fig, filename = 'Review_years_percent', auto_open=True)

'https://plotly.com/~beiqizhou/58/'

### 1.3 Review Counts Per Month During Covid

In [15]:
data_review["year_month"] = data_review["date"].str[:7]
data_review["review_day"] = data_review["date"].str[:10]

In [16]:
data_review_sub = data_review[(data_review["year"] == "2020") | (data_review["year"] == "2021")]

In [17]:
data_review_sub1 = data_review_sub[["year_month","business_id"]].groupby(["year_month"]).size().reset_index(name='counts')

In [18]:
fig = px.bar(data_review_sub1, x="year_month",y="counts",title = "Review Counts Per Month During Covid",
            color="counts", color_continuous_scale=px.colors.sequential.Reds,
            labels=dict(counts="Total Review Counts", year="Months"))

#add line
fig.add_vrect(x0='2019-12', x1='2020-02', line_width=0, fillcolor="green", opacity=0.1,
             annotation_text="Normal", annotation_position="top left")

#add line
fig.add_vrect(x0='2020-02', x1='2020-04', line_width=0, fillcolor="red", opacity=0.1,
             annotation_text="Sharp Drop", annotation_position="top left")

#add line
fig.add_vrect(x0='2020-04', x1='2020-08', line_width=0, fillcolor="orange", opacity=0.1,
             annotation_text="Recovery", annotation_position="top left")

#add line
fig.add_vrect(x0='2020-08', x1='2020-10', line_width=0, fillcolor="orange", opacity=0.1,
             annotation_text="Stabilized", annotation_position="top left")
#add line
fig.add_vrect(x0='2020-10', x1='2021-02', line_width=0, fillcolor="Yellow", opacity=0.1,
             annotation_text="Slight Decrease", annotation_position="top left")

fig.show()

In [19]:
import chart_studio.plotly as py
py.plot(fig, filename = 'Review_month', auto_open=True)

'https://plotly.com/~beiqizhou/60/'

## 2. Review Counts Vs. Covid-19 Cases: 

In [20]:
data_review_sub2 = data_review_sub[["review_day","business_id"]].groupby(["review_day"]).size().reset_index(name='counts')

In [21]:
covid_agg = pd.read_excel("C:/Users/becky/OneDrive/Desktop/Summer/COVID/countries-aggregated_csv.xlsx")

In [22]:
#since yelp data is only based in US state, we will only use US covid data
covid_us = covid_agg[covid_agg["Country"] == "US"]

In [23]:
#daily cases
covid_us["case_per_day"] = covid_us['Total'].diff().fillna(covid_us['Total'])



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [24]:
bar1 = data_review_sub2.iloc[21:,:] #01-28-2021

In [25]:
x_value = bar1["review_day"].to_list()
y_value = bar1["counts"].to_list()

In [26]:
covid_us = covid_us.reset_index()
line1 = covid_us.iloc[:373,:]

In [27]:
y2_value = line1["case_per_day"]

In [28]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Bar(x=x_value, y=y_value, name="review counts"),
    secondary_y=False
)

fig.add_trace(
    go.Scatter(x=x_value, y=y2_value, name="daily cases"),
    secondary_y=True,
)

#add line
fig.add_vrect(x0='2020-01-22', x1='2020-03-14', line_width=0, fillcolor="green", opacity=0.1,
             annotation_text="No measures", annotation_position="top left")

fig.add_vrect(x0='2020-03-15', x1='2020-06-01', line_width=0, fillcolor="red", opacity=0.1,
             annotation_text="First lockdown", annotation_position="top left")


fig.add_vrect(x0='2020-06-02', x1='2020-10-31', line_width=0, fillcolor="orange", opacity=0.1,
             annotation_text="Reopening", annotation_position="top left")


fig.add_vrect(x0='2020-11-01', x1='2021-01-28', line_width=0, fillcolor="yellow", opacity=0.1,
             annotation_text="Curfew", annotation_position="top left")

#color
fig.update_traces(marker=dict(color="#de7676"),
                  selector=dict(type="bar"))

fig.update_traces(marker=dict(color="#b01a1a"),
                  selector=dict(type="scatter"))

# Add figure title
fig.update_layout(
    title_text="Review Counts and Covid Cases")

# Set x-axis title
fig.update_xaxes(title_text="Date")

# Set y-axes titles
fig.update_yaxes(title_text="Number of Reviews", secondary_y=False)
fig.update_yaxes(title_text="Daily Cases", secondary_y=True)

fig.show()

In [29]:
import chart_studio.plotly as py
py.plot(fig, filename = 'Review_Covid', auto_open=True)

'https://plotly.com/~beiqizhou/62/'

## 3. Covid-19's Impacts on Different Business Sectors: 

In [30]:
data_business = pd.read_csv("C:/Users/becky/OneDrive/Desktop/yelp/yelp_business.csv")


Columns (27,93,96,106,107,108) have mixed types.Specify dtype option on import or set low_memory=False.



In [31]:
data_all = pd.merge(left=data_review_sub, right=data_business, how='left', left_on='business_id', right_on='business_id')

In [32]:
data_all.dtypes

review_id                                     object
user_id                                       object
business_id                                   object
stars_x                                      float64
useful                                         int64
                                              ...   
attributes_DietaryRestrictions_soy-free       object
attributes_DietaryRestrictions_vegetarian     object
attributes_HairSpecializesIn                  object
attributes_Open24Hours                        object
attributes_DietaryRestrictions                object
Length: 120, dtype: object

In [33]:
data_all["categories_new"]= data_all["categories"].str.split(",")

In [34]:
data_all["categories_new"].isnull().sum()

99

In [35]:
data_all = data_all[data_all["categories_new"].notna()]

In [36]:
cat = data_all["categories_new"].to_list()
categories_general = []
for i in cat:
    if " Restaurants" in i or "Restaurants" in i:
        categories_general.append("Restaurants")
    elif "Coffee & Tea" in i or " Coffee & Tea" in i:
        categories_general.append("Drinks & Snacks")
    elif "Beauty & Spas" in i or " Beauty & Spas" in i:
        categories_general.append("Beauty")
    elif "Pets" in i or " Pets" in i:
        categories_general.append("Pets")
    elif "Apartments" in i or " Apartments" in i:
        categories_general.append("Housing")
    elif "Hotels & Travel" in i or " Hotels & Travel" in i:
        categories_general.append("Travel")
    elif "Automotive" in i or " Automotive" in i:
        categories_general.append("Auto")
    elif "Active Life" in i or " Active Life" in i:
        categories_general.append("Exercise")
    elif "Ice Cream & Frozen Yogurt" in i or " Ice Cream & Frozen Yogurt" in i:
        categories_general.append("Drinks & Snacks")
    elif "Car Rental" in i or " Car Rental" in i:
        categories_general.append("Travel")
    elif "Grocery" in i or " Grocery" in i:
        categories_general.append("Grocery")
    elif "Bubble Tea" in i or " Bubble Tea" in i:
        categories_general.append("Drinks & Snacks")
    elif "Bakeries" in i or " Bakeries" in i:
        categories_general.append("Drinks & Snacks")
    elif "Donuts" in i or " Donuts" in i:
        categories_general.append("Drinks & Snacks")
    elif "Bars" in i or " Bars" in i:
        categories_general.append("Bars")
    elif "Home Services" in i or " Home Services" in i:
        categories_general.append("Home Services")
    elif " Health & Medical" in i or "Health & Medical" in i:
        categories_general.append("Health")
    elif "Local Services" in i or " Local Services" in i:
        categories_general.append("Local Services")
    elif 'Shopping' in i or ' Shopping' in i:
        categories_general.append("Shopping")
    elif "Financial Services" in i or " Financial Services" in i:
        categories_general.append("Financial Services")
    elif ' Juice Bars & Smoothies' in i or "Juice Bars & Smoothies" in i:
        categories_general.append("Drinks & Snacks")
    elif 'Event Planning & Services' in i or " Event Planning & Services" in i:
        categories_general.append("Special Event")
    elif 'Breweries' in i  or " Breweries" in i:
        categories_general.append("Bars")
    elif 'Beer' in i or " Beer" in i:
        categories_general.append("Bars")
    elif ' Yelp Events' in i or "Yelp Events" in i:
        categories_general.append("Special Event")
    elif "Education" in i or " Education" in i:
        categories_general.append("Education")
    elif "Arts & Entertainment" in i or " Arts & Entertainment" in i:
        categories_general.append("Arts & Entertainment")
    elif " Bagels" in i or "Bagels" in i:
        categories_general.append("Drinks & Snacks")
    elif ' Distilleries' in i  or "Distilleries" in i:
        categories_general.append("Bars")
    elif ' Public Services & Government' in i or 'Public Services & Government' in i:
        categories_general.append('Public Services & Government')
    elif ' Mass Media' in i or 'Mass Media' in i:
        categories_general.append('Mass Media')
    elif ' International Grocery' in i or 'International Grocery' in i:
        categories_general.append('Grocery')
    elif 'Seafood Markets' in i or ' Seafood Markets' in i:
        categories_general.append('Market')
    elif ' Religious Organizations' in i or 'Religious Organizations' in i:
        categories_general.append('Religious Organizations')
    elif  "Architects" in i or ' Architects' in i:
        categories_general.append('Home Services')
    elif 'Specialty Food' in i or ' Specialty Food' in i:
        categories_general.append('Grocery')
    elif "Donairs" in i or " Donairs" in i:
        categories_general.append("Restaurants")
    elif "Nightlife" in i  or " Nightlife" in i:
        categories_general.append("Bars")
    elif 'Food Trucks' in i  or ' Food Trucks' in i:
        categories_general.append("Drinks & Snacks")
    elif 'Food Delivery Services' in i  or " Food Delivery Services" in i:
        categories_general.append("Drinks & Snacks")
    elif ' Street Vendors' in i  or "Street Vendors" in i:
        categories_general.append("Drinks & Snacks")
    elif ' Professional Services' in i or "Professional Services" in i:
        categories_general.append("Professional Services")
    elif  "Poke" in i or " Poke" in i:
        categories_general.append("Restaurants")
    elif 'Local Flavor' in i or ' Local Flavor' in i:
        categories_general.append('Grocery')
    elif 'Tea Rooms' in i  or ' Tea Rooms' in i:
        categories_general.append("Drinks & Snacks")
    elif ' Desserts' in i  or 'Desserts' in i:
        categories_general.append("Drinks & Snacks")
    elif 'Convenience Stores' in i  or ' Convenience Stores' in i:
        categories_general.append("Grocery")
    elif ' Farmers Market' in i  or 'Farmers Market' in i:
        categories_general.append("Market")
    elif 'Patisserie/Cake Shop' in i  or ' Patisserie/Cake Shop' in i:
        categories_general.append("Drinks & Snacks")
    elif 'Gelato' in i  or ' Gelato' in i:
        categories_general.append("Drinks & Snacks")
    elif 'Coffee Roasteries' in i  or ' Coffee Roasteries' in i:
        categories_general.append("Drinks & Snacks")
    elif 'Cideries' in i  or ' Cideries' in i:
        categories_general.append("Drinks & Snacks")
    elif 'Pretzels' in i  or ' Pretzels' in i:
        categories_general.append("Drinks & Snacks")
    elif 'Empanadas' in i  or ' Empanadas' in i:
        categories_general.append("Drinks & Snacks")
    elif 'Kombucha' in i  or ' Kombucha' in i:
        categories_general.append("Drinks & Snacks") 
    elif 'Butcher' in i  or ' Butcher' in i:
        categories_general.append("Grocery")
    elif 'Custom Cakes' in i  or ' Custom Cakes' in i:
        categories_general.append("Drinks & Snacks")
    else: categories_general.append("Others")

In [37]:
data_all["categories_general"] = categories_general

In [38]:
business_type = data_all[["categories_general"]].groupby(["categories_general"]).size().reset_index(name='counts').sort_values(by=["counts"], ascending=False)
business_type.reset_index()

Unnamed: 0,index,categories_general,counts
0,20,Restaurants,400940
1,3,Beauty,37845
2,10,Home Services,31454
3,4,Drinks & Snacks,27195
4,1,Auto,25883
5,21,Shopping,19997
6,9,Health,17772
7,23,Travel,16535
8,6,Exercise,14276
9,12,Local Services,13572


In [39]:
cat_line = data_all[["categories_general","review_day"]].groupby(["categories_general","review_day"]).size().reset_index(name='counts')

In [40]:
fig = px.line(cat_line, x="review_day", y="counts", color='categories_general')
fig.show()

### 3.1 Positive Impacts

In [41]:
pos_impact = cat_line[cat_line["categories_general"] == "Special Event"]

In [42]:
pos_impact["weekly_MA"] = pos_impact["counts"].rolling(window=7).mean()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [43]:
fig = px.line(pos_impact, x="review_day", y="weekly_MA", color='categories_general', title = "Increased Review Counts During Covid",
            labels=dict(weekly_MA="Weekly Reviews MA", review_day="Months"),
            color_discrete_map={'Special Event':'#d9312b'})


#add line
fig.add_vrect(x0='2021-01-10', x1='2021-01-28', line_width=0, fillcolor="green", opacity=0.1)

fig.show()

In [44]:
import chart_studio.plotly as py
py.plot(fig, filename = 'Review_Covid_pos', auto_open=True)

'https://plotly.com/~beiqizhou/65/'

### 3.2 No Impacts or Slight Increase

In [45]:
#no impact: Housing, Grocery, Market, Financial Services, 
#Education, Public Service Governemnt, Mass Media, Professional Services
list_no = ['Financial Services', 'Grocery', 'Market','Mass Media']

In [46]:
no_impact = cat_line[cat_line["categories_general"].isin(list_no)]

In [47]:
no_impact.categories_general.unique()

array(['Financial Services', 'Grocery', 'Market', 'Mass Media'],
      dtype=object)

In [48]:
weekly_MA = []
for i in list_no:
    MA = []
    new_df = no_impact[no_impact["categories_general"] == i]
    MA = (new_df["counts"].rolling(window=7).mean()).to_list()
    weekly_MA.append(MA)
flat_list = [item for sublist in weekly_MA for item in sublist]   

In [49]:
no_impact["weekly_MA"] = flat_list



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [50]:
fig = px.line(no_impact, x="review_day", y="weekly_MA", color='categories_general', title = "Constant Review Counts During Covid",
            labels=dict(weekly_MA="Weekly Reviews MA", review_day="Months"),
              color_discrete_map={'Financial Services':'#d9312b', 
                                  'Grocery':"#faaab8", 
                                  'Market':"#e38686",
                                  'Mass Media':"#7a0606"}
             )
fig.show()

In [51]:
import chart_studio.plotly as py
py.plot(fig, filename = 'Review_Covid_no', auto_open=True)

'https://plotly.com/~beiqizhou/69/'

### 3.3 Some Impacts But Back to Normal 

In [52]:
list_some = ['Auto', 'Health', 'Home Services', 'Housing', 'Local Services','Pets', 'Religious Organizations', 'Shopping']

In [53]:
some_impact = cat_line[cat_line["categories_general"].isin(list_some)]

In [54]:
some_impact.categories_general.unique()

array(['Auto', 'Health', 'Home Services', 'Housing', 'Local Services',
       'Pets', 'Religious Organizations', 'Shopping'], dtype=object)

In [55]:
weekly_MA = []
for i in list_some:
    MA = []
    new_df = some_impact[some_impact["categories_general"] == i]
    MA = (new_df["counts"].rolling(window=7).mean()).to_list()
    weekly_MA.append(MA)
flat_list = [item for sublist in weekly_MA for item in sublist]   

In [56]:
some_impact["weekly_MA"] = flat_list



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [57]:
fig = px.line(some_impact, x="review_day", y="weekly_MA", color='categories_general', title = "Review Counts Impacted by Covid-19 But Back to Normal",
            labels=dict(weekly_MA="Weekly Reviews MA", review_day="Months"),
              color_discrete_map={'Auto':'#d9312b', 
                                  'Health':"#faaab8", 
                                  'Home Services':"#e38686",
                                  'Housing':"#7a0606",
                                  'Local Services':"#e80792",
                                  'Pets':"#e6c3d8",
                                  'Religious':"#733259",
                                  'Shopping':"#ed4407"
                                 }
             )

#add line
fig.add_vrect(x0='2020-03-01', x1='2020-04-15', line_width=0, fillcolor="red", opacity=0.1)

#add line
fig.add_vrect(x0='2020-04-16', x1='2020-05-30', line_width=0, fillcolor="green", opacity=0.1)

fig.show()

In [58]:
import chart_studio.plotly as py
py.plot(fig, filename = 'Review_Covid_some', auto_open=True)

'https://plotly.com/~beiqizhou/75/'

### 3.4 Impacts And Not Recovered to Initial Level 

In [59]:
list_impact = ['Beauty', 'Drinks & Snacks', 'Education', 'Exercise','Restaurants']

In [60]:
impact = cat_line[cat_line["categories_general"].isin(list_impact)]

In [61]:
impact.categories_general.unique()

array(['Beauty', 'Drinks & Snacks', 'Education', 'Exercise',
       'Restaurants'], dtype=object)

In [62]:
weekly_MA = []
for i in list_impact:
    MA = []
    new_df = impact[impact["categories_general"] == i]
    MA = (new_df["counts"].rolling(window=7).mean()).to_list()
    weekly_MA.append(MA)
flat_list = [item for sublist in weekly_MA for item in sublist]   

In [63]:
impact["weekly_MA"] = flat_list



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [64]:
fig = px.line(impact, x="review_day", y="weekly_MA", color='categories_general', title = "Review Counts Impacted by Covid-19 And Not Recovered to Initial Level",
            labels=dict(weekly_MA="Weekly Reviews MA", review_day="Months"),
              color_discrete_map={'Beauty':'#d9312b', 
                                  'Drinks & Snacks':"#e80792", 
                                  'Education':"#e38686",
                                  'Exercise':"#7a0606",
                                  'Restaurants':"#faaab8"
                                 }
             )

#add line
fig.add_vrect(x0='2020-02-21', x1='2020-04-07', line_width=0, fillcolor="red", opacity=0.1)

#add line
fig.add_vrect(x0='2020-04-08', x1='2020-07-01', line_width=0, fillcolor="green", opacity=0.1)

#add line
fig.add_vrect(x0='2020-07-01', x1='2021-01-28', line_width=0, fillcolor="orange", opacity=0.1)


fig.show()

In [65]:
import chart_studio.plotly as py
py.plot(fig, filename = 'Review_Covid_yes', auto_open=True)

'https://plotly.com/~beiqizhou/78/'

### 3.5 Mostly Impacted

In [66]:
list_most = ['Arts & Entertainment', 'Bars', 'Travel']

In [67]:
most_impact = cat_line[cat_line["categories_general"].isin(list_most)]

In [68]:
most_impact.categories_general.unique()

array(['Arts & Entertainment', 'Bars', 'Travel'], dtype=object)

In [69]:
weekly_MA = []
for i in list_most:
    MA = []
    new_df = most_impact[most_impact["categories_general"] == i]
    MA = (new_df["counts"].rolling(window=7).mean()).to_list()
    weekly_MA.append(MA)
flat_list = [item for sublist in weekly_MA for item in sublist]   

In [70]:
most_impact["weekly_MA"] = flat_list



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [71]:
fig = px.line(most_impact, x="review_day", y="weekly_MA", color='categories_general', title = "Review Counts Mostly Impacted by Covid-19",
            labels=dict(weekly_MA="Weekly Reviews MA", review_day="Months"),
              color_discrete_map={'Arts & Entertainment':'#faaab8', 
                                  'Bars':"#c91818", 
                                  'Travel':"#e38686"
                                 }
             )

#add line
fig.add_vrect(x0='2020-02-21', x1='2020-05-15', line_width=0, fillcolor="red", opacity=0.1)

#add line
fig.add_vrect(x0='2020-05-16', x1='2021-01-28', line_width=0, fillcolor="orange", opacity=0.1)


fig.show()

In [72]:
import chart_studio.plotly as py
py.plot(fig, filename = 'Review_Covid_most', auto_open=True)

'https://plotly.com/~beiqizhou/80/'

## 4. Outlook

### 4.1 Most Used Words in Covid Banners

In [73]:
data_covid = pd.read_csv("C:/Users/becky/OneDrive/Desktop/yelp/yelp_covid.csv")

In [75]:
data_covid["Covid Banner"][6]

"We are closely monitoring COVID-19 and adjusting how we do business. Please see important information below regarding COVID-19 and how it is impacting the way we do business. We Are Just a Phone Call Away! The CDC's social distancing guidelines have temporarily forced the physical closing of many Leasing Offices across the country. Most of our communities have switched to a non-contact office - but we are still here for you by phone & email. Visit our business information. I am in the market for a new home, can I tour your community with a team member? We are currently utilizing multiple, virtual tour options for prospect. In addition to placing pre-recorded, virtual tours on our websites, many of our communities are also offering 1:1 scheduled tours via video chat, social media, etc. Please contact us directly for more information. For more information on our community updates, or for additional rental resources visit our website: https://www.community-covid-19-updates.com/"

In [77]:
#remove false row
covid_banner_re = data_covid["Covid Banner"].to_list()
covid_banner_new = []
for i in range(0,len(covid_banner_re)):
    if covid_banner_re[i] != "FALSE":
        covid_banner_new.append(covid_banner_re[i])

In [78]:
listToStr = ' '.join([str(elem) for elem in covid_banner_new])

In [79]:
tokens = listToStr.split()

In [80]:

sr= stopwords.words('english')
clean_tokens = tokens[:]
for token in tqdm(tokens):
    if token in stopwords.words('english'):
        
        clean_tokens.remove(token)

100%|█████████████████████████████████████████████████████████████████████████| 840108/840108 [18:15<00:00, 766.99it/s]


In [81]:
words_no_punc = []

for w in tqdm(clean_tokens):
    if w.isalpha():
        words_no_punc.append(w.lower())

100%|█████████████████████████████████████████████████████████████████████| 532124/532124 [00:00<00:00, 2673828.16it/s]


In [82]:
freqdist = FreqDist(words_no_punc)

In [83]:
word_freq = pd.DataFrame(freqdist.items(), columns=['word', 'frequency'])
word_freq = word_freq.iloc[1:,:]
word_freq = word_freq.sort_values(by=["frequency"], ascending=False)

In [84]:
banner_top = word_freq.head(20)

In [85]:
fig = px.bar(banner_top, x="word",y="frequency",color='frequency', title = "Most Used Words in Covid Banner", 
             orientation='v',
            color_continuous_scale=px.colors.sequential.Reds,
            labels=dict(word="Most Mentioned Words", frequency="Frequency"))
fig.show()

In [86]:
import chart_studio.plotly as py
py.plot(fig, filename = 'banner_message', auto_open=True)

'https://plotly.com/~beiqizhou/92/'

### 4.2 Percentage of Business Offering Takeout

In [87]:
#RestaurantsTakeOut
takeout = data_business["attributes_RestaurantsTakeOut"].to_list()
for i in range(0,len(takeout)):
    if (takeout[i] != "True" or takeout[i] != "False"):
        takeout[i] == "Not Applicable"
data_business["attributes_RestaurantsTakeOut_new"] = takeout

In [88]:
graph = data_business[["attributes_RestaurantsTakeOut_new"]].groupby(["attributes_RestaurantsTakeOut_new"]).size().reset_index(name='counts')
fig = px.pie(graph, values="counts" , names = "attributes_RestaurantsTakeOut_new", 
             title='Percentage of Business Offering Takeout',color_discrete_sequence=px.colors.sequential.RdBu)
fig.show()

In [89]:
import chart_studio.plotly as py
py.plot(fig, filename = 'take_out', auto_open=True)

'https://plotly.com/~beiqizhou/94/'

### 4.3 Percentage of Business Offering Delivery

In [90]:
#attributes_RestaurantsDelivery
delivery = data_business["attributes_RestaurantsDelivery"].to_list()
for i in range(0,len(delivery)):
    if (delivery[i] != "True" or delivery[i] != "False"):
        delivery[i] == "Not Applicable"
data_business["attributes_RestaurantsDelivery_new"] = delivery

In [91]:
graph = data_business[["attributes_RestaurantsDelivery_new"]].groupby(["attributes_RestaurantsDelivery_new"]).size().reset_index(name='counts')
fig = px.pie(graph, values="counts" , names = "attributes_RestaurantsDelivery_new", 
             title='Percentage of Business Offering Delivery',color_discrete_sequence=px.colors.sequential.RdBu)
fig.show()

In [92]:
import chart_studio.plotly as py
py.plot(fig, filename = 'delivery', auto_open=True)

'https://plotly.com/~beiqizhou/96/'