# Prepare data

In [1]:
# Import packages you need

import pandas as pd
import numpy as np

# Packages for dashboard
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.express as px

# Packages for handling dates
from dateutil import parser
from datetime import date

In [2]:
# Data preprocessing
# Add more cleaning procedures if needed

restaurant_df=pd.read_excel(r'E:\LMU\rmds_lab\competition_2021\Q3_competition\sample_dashboard\inputs\Q3_competition_detail_dataset.xlsx')
restaurant_df['latitude']=restaurant_df['latitude'].astype('float')
restaurant_df['longitude']=restaurant_df['longitude'].astype('float')
restaurant_df=restaurant_df.reset_index()
restaurant_df=restaurant_df.drop('index',axis=1)
restaurant_df['Price'] = restaurant_df['price'].apply(lambda x: len(str(x)))


In [3]:
# Cleaning city names
# Add more cleaning procedures if needed

restaurant_df['City'] = restaurant_df.city.apply(lambda x: x.strip().lower())

# Column name changed
restaurant_df['City'] = restaurant_df.City.apply(lambda x: x[:-2] if x[-2:]=='ca' else x)
restaurant_df['City'] = restaurant_df.City.apply(lambda x: ' '.join(x.split()))
restaurant_df['City'] = restaurant_df.City.replace('lost angeles', 'los angeles')
restaurant_df['City'] = restaurant_df.City.replace('longbeach', 'long beach')
restaurant_df['City'] = restaurant_df.City.replace('rowland hghts', 'rowland heights')
restaurant_df['City'] = restaurant_df.City.replace('rowland heightes', 'rowland heights')
restaurant_df['City'] = restaurant_df.City.replace('santa fe spring', 'santa fe springs')
restaurant_df['City'] = restaurant_df.City.replace('shermanoaks', 'sherman oaks')
restaurant_df['City'] = restaurant_df.City.replace('canyon cntry', 'canyon country')
restaurant_df['City'] = restaurant_df.City.replace('studiocity', 'studio city')
restaurant_df['City'] = restaurant_df.City.replace('santa moni', 'santa monica')

# Set up capital letters for first letter
restaurant_df['City'] = restaurant_df.City.apply(lambda x: str(x)[0].upper()+ str(x)[1:])
restaurant_df['categories01'] = restaurant_df.categories01.apply(lambda x: str(x)[0].upper()+ str(x)[1:])
restaurant_df['categories02'] = restaurant_df.categories02.apply(lambda x: str(x)[0].upper()+ str(x)[1:])
restaurant_df['categories03'] = restaurant_df.categories03.apply(lambda x: str(x)[0].upper()+ str(x)[1:])

# Using this file for map and bar chart
map_df = restaurant_df.copy()
bar_df = restaurant_df.copy()
map_df.head()

Unnamed: 0,id,name,is_claimed,is_closed,phone,review_count,categories01,categories02,categories03,rating,...,city,address,restaurant_url,image_url,latitude,longitude,photos,cross_streets,Price,City
0,nzgC5hhlnSq2DYbJbtH5MQ,Foxy's Landing & Restaurant,True,False,16619490000.0,52,Breakfast_brunch,Tradamerican,Nan,4.0,...,Lancaster,"['4555 W Avenue G', 'Lancaster, CA 93536']",https://www.yelp.com/biz/foxys-landing-and-res...,https://s3-media1.fl.yelpcdn.com/bphoto/LooWtz...,34.738829,-118.216215,['https://s3-media1.fl.yelpcdn.com/bphoto/LooW...,,2,Lancaster
1,i-2aG9_PQBEy7LrsRv0Ivg,Mosman's Steakhouse,True,False,16619490000.0,63,Bars,Steak,Nan,3.5,...,Lancaster,"['46645 W 60th W', 'Lancaster, CA 93536']",https://www.yelp.com/biz/mosmans-steakhouse-la...,https://s3-media3.fl.yelpcdn.com/bphoto/JJ3mkC...,34.73058,-118.23836,['https://s3-media3.fl.yelpcdn.com/bphoto/JJ3m...,,2,Lancaster
2,DJoeogRsOW5s9MzgveHQ2A,El Tamarindo,True,False,16617230000.0,11,Salvadoran,Nan,Nan,3.5,...,Lancaster,"['551 W Ave I', 'Ste E', 'Lancaster, CA 93534']",https://www.yelp.com/biz/el-tamarindo-lancaste...,https://s3-media1.fl.yelpcdn.com/bphoto/UMSkfH...,34.707469,-118.146286,['https://s3-media1.fl.yelpcdn.com/bphoto/UMSk...,,1,Lancaster
3,hwWfv3sSxV3a47UAdSVT5w,Subway,True,False,16617300000.0,13,Sandwiches,Nan,Nan,2.5,...,Lancaster,"['1821 W Ave I', 'Unit 103', 'Lancaster, CA 93...",https://www.yelp.com/biz/subway-lancaster-106?...,https://s3-media3.fl.yelpcdn.com/bphoto/4Yo7Ea...,34.705218,-118.16418,['https://s3-media3.fl.yelpcdn.com/bphoto/4Yo7...,,1,Lancaster
4,TxU0fwF2N2nVhCpzokc1Pg,Little Caesars,True,False,16619460000.0,54,Pizza,Nan,Nan,1.5,...,Lancaster,"['1841 W Ave I', 'Bldg 2, Ste D', 'Lancaster, ...",https://www.yelp.com/biz/little-caesars-lancas...,https://s3-media2.fl.yelpcdn.com/bphoto/-Fz1W5...,34.7051,-118.16487,['https://s3-media2.fl.yelpcdn.com/bphoto/-Fz1...,,1,Lancaster


In [4]:
# Create city_list

city_list = restaurant_df.groupby('City').count().sort_values(by='id',ascending=False)[:10].index.to_list()
city_list.remove('Los angeles')
city_list.append('Pasadena')
city_list.sort()


# Create category_list

category_list = restaurant_df.groupby('categories01').count().sort_values(by='id',ascending=False)[:10].index.to_list()
category_list.sort()
print(city_list)
print(category_list)

['Beverly hills', 'Culver city', 'Glendale', 'Huntington park', 'Lancaster', 'Long beach', 'Palmdale', 'Pasadena', 'Santa monica', 'South gate']
['Burgers', 'Chinese', 'Hotdogs', 'Japanese', 'Korean', 'Mexican', 'Newamerican', 'Pizza', 'Sandwiches', 'Sushi']


In [5]:
# Read review data
review_df = pd.read_excel(r'E:\LMU\rmds_lab\competition_2021\Q3_competition\sample_dashboard\inputs\Q3_competition_review_dataset.xlsx')

# Merge together
extended_df = review_df.merge(restaurant_df, left_on = 'id', right_on = 'id')
extended_df.head()

Unnamed: 0,id,review_id,review_text,review_rating,review_time_created,name,is_claimed,is_closed,phone,review_count,...,city,address,restaurant_url,image_url,latitude,longitude,photos,cross_streets,Price,City
0,cal0Wpupxj9c_AV7WzDXsw,AyueC5Vq_5lUKJFqSzXWWw,Slightly turned off by the hostess. She wasn't...,3.0,2021-07-13 15:01:59,GRANVILLE,True,False,14245230000.0,1610,...,West Hollywood,"['8701 Beverly Blvd', 'West Hollywood, CA 90048']",https://www.yelp.com/biz/granville-west-hollyw...,https://s3-media2.fl.yelpcdn.com/bphoto/EuQ6eU...,34.07713,-118.38068,['https://s3-media2.fl.yelpcdn.com/bphoto/EuQ6...,,2,West hollywood
1,cal0Wpupxj9c_AV7WzDXsw,yaH4AmHUz9b3Ywv4VtvU5g,Wish I would have known about no brunch at the...,3.0,2021-07-06 13:50:42,GRANVILLE,True,False,14245230000.0,1610,...,West Hollywood,"['8701 Beverly Blvd', 'West Hollywood, CA 90048']",https://www.yelp.com/biz/granville-west-hollyw...,https://s3-media2.fl.yelpcdn.com/bphoto/EuQ6eU...,34.07713,-118.38068,['https://s3-media2.fl.yelpcdn.com/bphoto/EuQ6...,,2,West hollywood
2,cal0Wpupxj9c_AV7WzDXsw,YiuFLFWsrP92_QWa-d2W2Q,I had an amazing experience at Granville.\n\nw...,5.0,2021-08-09 21:06:24,GRANVILLE,True,False,14245230000.0,1610,...,West Hollywood,"['8701 Beverly Blvd', 'West Hollywood, CA 90048']",https://www.yelp.com/biz/granville-west-hollyw...,https://s3-media2.fl.yelpcdn.com/bphoto/EuQ6eU...,34.07713,-118.38068,['https://s3-media2.fl.yelpcdn.com/bphoto/EuQ6...,,2,West hollywood
3,jVYU9iXvBMiC2A4H12Azfg,VyKvwjOuJxKWiLlyzsqQ_A,Photo dump from dinner on Aug 8th. Literally c...,5.0,2021-08-09 13:01:09,AOC,True,False,13108600000.0,2362,...,Los Angeles,"['8700 W 3rd St', 'Los Angeles, CA 90048']",https://www.yelp.com/biz/aoc-los-angeles?adjus...,https://s3-media4.fl.yelpcdn.com/bphoto/UGnsMC...,34.073416,-118.381928,['https://s3-media4.fl.yelpcdn.com/bphoto/UGns...,,3,Los angeles
4,jVYU9iXvBMiC2A4H12Azfg,D0-MjyINO2u9IRmf1opaUQ,I've had this place bookmarked on my Yelp for ...,2.0,2021-07-17 16:28:47,AOC,True,False,13108600000.0,2362,...,Los Angeles,"['8700 W 3rd St', 'Los Angeles, CA 90048']",https://www.yelp.com/biz/aoc-los-angeles?adjus...,https://s3-media4.fl.yelpcdn.com/bphoto/UGnsMC...,34.073416,-118.381928,['https://s3-media4.fl.yelpcdn.com/bphoto/UGns...,,3,Los angeles


# Pie Chart (Topic modeling)

In [6]:
# Topic modeling content following procedures in link below:
# https://www.machinelearningplus.com/nlp/topic-modeling-gensim-python/

# Feel free to apply other techniques in NLP or in other language models

In [7]:
# Install and import packages for topic modelling


# Solution for some potential problems:
# https://stackoverflow.com/questions/49964028/spacy-oserror-cant-find-model-en
# https://github.com/explosion/spaCy/issues/7453
!pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz

# Define all text in dataframe
data = extended_df.review_text.values.tolist()

# You might need to download stopwords
import nltk
nltk.download('stopwords')

from nltk.corpus import stopwords
stop_words = stopwords.words('english')
#print(stop_words)
stop_words.extend(['nan', 'order','also','good','great', 'want','make','see','go','get','come','give','really','always',
                  'usually', 'need', 'love', 'horrible','star', 'never','use','today','work','find','business','people',
                  'awsome','worker','leave','plate','ever','amazing'])

#Import libraries needed
import gensim
import gensim.corpora as corpora
import re
from gensim.utils import simple_preprocess
import spacy

# Gensim
import gensim
import gensim.corpora as corpora
from gensim.utils import simple_preprocess
from gensim.models import CoherenceModel
    
nlp = spacy.load('en_core_web_sm')

Collecting https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz (13.7 MB)




In [8]:
# Remove distracting single quotes
def sent_to_words(sentences):
    for sentence in sentences:
        yield(gensim.utils.simple_preprocess(str(sentence), deacc=True))  # deacc=True removes punctuations

# Define functions for stopwords, bigrams, trigrams and lemmatization
def remove_stopwords(texts):
    return [[word for word in simple_preprocess(str(doc)) if word not in stop_words] for doc in texts]

def make_bigrams(texts):
    return [bigram_mod[doc] for doc in texts]

def make_trigrams(texts):
    return [trigram_mod[bigram_mod[doc]] for doc in texts]

def lemmatization(texts, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV']):
    """https://spacy.io/api/annotation"""
    texts_out = []
    for sent in texts:
        doc = nlp(" ".join(sent)) 
        texts_out.append([token.lemma_ for token in doc if token.pos_ in allowed_postags])
    return texts_out

data_words = list(sent_to_words(data))

# Build the bigram and trigram models
bigram = gensim.models.Phrases(data_words, min_count=5, threshold=100) # higher threshold fewer phrases.
bigram_mod = gensim.models.phrases.Phraser(bigram)

# Remove Stop Words
data_words_nostops = remove_stopwords(data_words)

# Form Bigrams
data_words_bigrams = make_bigrams(data_words_nostops)

data_lemmatized = lemmatization(data_words_bigrams, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV'])

# Create Dictionary
id2word = corpora.Dictionary(data_lemmatized)

# Create Corpus
texts = data_lemmatized

# Term Document Frequency
corpus = [id2word.doc2bow(text) for text in texts]


In [9]:
# Selecting model by tuning topic numbers
topic_range=range(3,7,1)
perplexity=[]
coherence=[]
topic_num=[]

for i in topic_range:
    lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus,id2word=id2word,random_state=100,update_every=1,chunksize=100,passes=10,
                                           num_topics=i, alpha='auto',per_word_topics=True)
                                           
    topic_num.append(i)
    perplexity.append(lda_model.log_perplexity(corpus))  # a measure of how good the model is. lower the better.

    # Compute Coherence Score
    coherence_model_lda = CoherenceModel(model=lda_model, texts=data_lemmatized , dictionary=id2word, coherence='c_v')
    coherence_lda = coherence_model_lda.get_coherence()    # higher the better
    coherence.append(coherence_lda)  

# Show performance for different topics
# Could switch random seed to get different results
data_dict={'topic_num':topic_num,'perplexity':perplexity,'coherence':coherence}
performance_topic_num=pd.DataFrame(data_dict)
performance_topic_num                                           
                                    

Unnamed: 0,topic_num,perplexity,coherence
0,3,-7.593064,0.248111
1,4,-7.666703,0.191943
2,5,-7.722593,0.187518
3,6,-7.779551,0.185313


In [10]:
#Build the model with 6 topics
lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus,
                                           id2word=id2word,
                                           num_topics=6, 
                                           random_state=999,
                                           update_every=1,
                                           chunksize=100,
                                           passes=18,
                                           alpha='auto',
                                           per_word_topics=False)


In [11]:
def format_topics_sentences(ldamodel=lda_model, corpus=corpus,texts=data):
    # Init output
    sent_topics_df = pd.DataFrame()

    # Get main topic in each document
    for i, row in enumerate(ldamodel[corpus]):
        row = sorted(row, key=lambda x: (x[1]), reverse=True)
        # Get the Dominant topic, Perc Contribution and Keywords for each document
        for j, (topic_num, prop_topic) in enumerate(row):
            if j == 0:  # => dominant topic
                wp = ldamodel.show_topic(topic_num)
                topic_keywords = ", ".join([word for word, prop in wp])
                sent_topics_df = sent_topics_df.append(pd.Series([int(topic_num), round(prop_topic,4), topic_keywords]), ignore_index=True)
            else:
                break
    sent_topics_df.columns = ['Dominant_Topic', 'Perc_Contribution', 'Topic_Keywords']

    # Add original text to the end of the output
    contents = pd.Series(texts)
    sent_topics_df = pd.concat([sent_topics_df, contents], axis=1)
    return(sent_topics_df)


df_topic_sents_keywords = format_topics_sentences(ldamodel=lda_model, corpus=corpus, texts=data)

# Assign topics to text data
df_dominant_topic = df_topic_sents_keywords.reset_index()
df_dominant_topic.columns = ['Document_No', 'Dominant_Topic', 'Topic_Perc_Contrib', 'Keywords', 'Text']
df_dominant_topic

Unnamed: 0,Document_No,Dominant_Topic,Topic_Perc_Contrib,Keywords,Text
0,0,0.0,0.3399,"food, good, pizza, location, back, fry, even, ...",Slightly turned off by the hostess. She wasn't...
1,1,5.0,0.2587,"order, chicken, get, taco, well, fresh, sandwi...",Wish I would have known about no brunch at the...
2,2,3.0,0.2294,"place, service, time, try, come, go, customer,...",I had an amazing experience at Granville.\n\nw...
3,3,0.0,0.3535,"food, good, pizza, location, back, fry, even, ...",Photo dump from dinner on Aug 8th. Literally c...
4,4,3.0,0.3096,"place, service, time, try, come, go, customer,...",I've had this place bookmarked on my Yelp for ...
...,...,...,...,...,...
32125,32125,3.0,0.3005,"place, service, time, try, come, go, customer,...",Horrible Horrible Horrible!!!!\nI had heard so...
32126,32126,3.0,0.2364,"place, service, time, try, come, go, customer,...",We stopped in after a trip to the desert. Spl...
32127,32127,0.0,0.3219,"food, good, pizza, location, back, fry, even, ...",Updated hours are not true. Unable to order th...
32128,32128,3.0,0.2890,"place, service, time, try, come, go, customer,...",Trash. Employees are very rude. Nobody there w...


In [12]:
#Print topics in job categories
for n in range(6):
  print(df_dominant_topic[df_dominant_topic['Dominant_Topic']==n]['Keywords'].unique())


# Could go back to remove more stopwords after inspecting the key words below

['food, good, pizza, location, back, fry, even, taste, call, burger']
['delicious, say, nice, spot, eat, restaurant, know, little, super, burrito']
['take, year, favorite, new, close, last, drive, work, visit, keep']
['place, service, time, try, come, go, customer, first, bad, staff']
['wait, stop, long, minute, home, use, line, live, week, finally']
['order, chicken, get, taco, well, fresh, sandwich, cheese, meat, decide']


In [13]:
# categorizing the key words into specific values cared by customers
#0: Food
#1: Environment
#2: Experience
#3: Service
#4: Waiting time
#5: Others

In [14]:
#Replace numbers with value names
extended_df['Value']=df_dominant_topic.copy()['Dominant_Topic']

extended_df['Value'].replace({0.0: 'Food', 
                              1.0: 'Environment',
                              2.0: 'Experience',
                              3.0: 'Service',
                              4.0: 'Waiting time',
                              5.0: 'Others'}, inplace=True)

pie_df = extended_df[['review_id','categories01', 'categories02', 'categories03', 'City','Value']]

In [15]:
#See distribution in reviews topics

pie_df.groupby('Value').count()

Unnamed: 0_level_0,review_id,categories01,categories02,categories03,City
Value,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Environment,823,823,823,823,823
Experience,57,57,57,57,57
Food,11044,11044,11044,11044,11044
Others,1842,1842,1842,1842,1842
Service,16911,18362,18362,18362,18362
Waiting time,2,2,2,2,2


# Heatmap

In [16]:
#Organize review time values

extended_df_drop = extended_df.copy()
extended_df_drop = extended_df_drop.dropna(subset = ['review_time_created'])
extended_df_drop['datetime'] = extended_df_drop['review_time_created'].apply(lambda x: parser.parse(str(x)))
extended_df_drop['weekday'] = extended_df_drop['datetime'].apply(lambda x: x.timetuple()[6])
extended_df_drop['time'] = extended_df_drop['datetime'].apply(lambda x: 3 if (x.timetuple()[3]>0  and x.timetuple()[3]<=6)
                                                                else (0   if (x.timetuple()[3]>6  and x.timetuple()[3]<=12)
                                                                else (1   if (x.timetuple()[3]>12 and x.timetuple()[3]<=18)
                                                                else  2 
                                                                     ))
                                                             )

heatmap_df = extended_df_drop[['City','time', 'weekday','review_id']]
heatmap_df.head()

Unnamed: 0,City,time,weekday,review_id
0,West hollywood,1,1,AyueC5Vq_5lUKJFqSzXWWw
1,West hollywood,1,1,yaH4AmHUz9b3Ywv4VtvU5g
2,West hollywood,2,0,YiuFLFWsrP92_QWa-d2W2Q
3,Los angeles,1,0,VyKvwjOuJxKWiLlyzsqQ_A
4,Los angeles,1,5,D0-MjyINO2u9IRmf1opaUQ


# Radar Chart

In [17]:
# Create sentiment scores

from textblob import TextBlob

pol = lambda x: TextBlob(str(x)).sentiment.polarity
extended_df['polarity'] = extended_df['review_text'].apply(pol)


In [18]:
# Read population density and income data

pop_des=pd.read_csv(r'E:\LMU\rmds_lab\competition_2021\Q3_competition\sample_dashboard\inputs\population_density.csv')
pop_des['Name'] = pop_des['name'].apply(lambda x: str(x).lower())
pop_des['Name'] = pop_des['Name'].apply(lambda x: str(x)[0].upper()+ str(x)[1:])

income=pd.read_csv(r'E:\LMU\rmds_lab\competition_2021\Q3_competition\sample_dashboard\inputs\Median_Household_Income_(2016).csv')
income['city'] = income['City_Name'].apply(lambda x: str(x).lower())
income['city'] = income['city'].apply(lambda x: str(x)[0].upper()+ str(x)[1:])

income_group = income.groupby('city').mean().reset_index()
income_group.head()

Unnamed: 0,city,OBJECTID,GEOID10,COUNTYFP10,TRACTCE10,ZIP,Acres,SqMi,Pop_16,Pop_Den,...,Other_Pct,NonWhite_P,Elders_Pct,SpksEng_Pc,UnEmp_Rate,Emp_15,Emp_Den,Income_Pct,Under19_Pc,MHI2016
0,,1578.707317,6065791000.0,65.544715,246391.857724,91958.243902,88229.988551,137.859357,4490.98374,1230.174797,...,0.0,43.037644,14.540244,80.16861,0.117539,1580.50813,350.921054,63684.211382,25.126423,75334.958159
1,Acton,3666.0,6037911000.0,37.0,910804.5,93510.0,21652.935591,33.832711,3885.5,115.0,...,0.0,23.019941,13.65,96.542727,0.142908,863.0,25.69701,83423.5,19.35,88800.0
2,Adelanto,640.0,6071009000.0,71.0,9114.0,92301.0,3208.159879,5.01275,8907.333333,2538.0,...,0.0,82.744861,7.233333,92.652833,0.283731,718.666667,166.954433,32950.333333,43.066667,30381.0
3,Agoura hills,2404.666667,6037800000.0,37.0,800327.666667,91301.0,1157.799864,1.809062,6286.0,3642.666667,...,0.0,20.625504,12.533333,96.928856,0.081204,2533.0,1575.846109,116335.0,25.7,121158.666667
4,Agua dulce,1640.0,6037911000.0,37.0,910813.0,91390.0,20384.572868,31.850895,3719.0,117.0,...,0.0,19.903643,16.7,99.295223,0.128446,697.0,21.883215,97578.0,19.7,101220.0


In [19]:
# Merge data we need together (6 dimensions)

df_pop_des = extended_df.merge(pop_des,left_on = 'City', right_on = 'Name')
df_pop_des_inc = df_pop_des.merge(income_group,left_on = 'City', right_on = 'city')

extended_df_radar = df_pop_des_inc[['City','rating','Price', 'polarity', 'review_count', 'density','MHI2016']].copy()
extended_df_radar_group = extended_df_radar.groupby('City').mean().reset_index()
extended_df_radar_group.head()

Unnamed: 0,City,rating,Price,polarity,review_count,density,MHI2016
0,Agoura hills,3.859649,1.982456,0.237613,243.894737,982.1817,121158.666667
1,Alhambra,3.666667,1.8,0.171736,508.35,4174.4164,55027.75
2,Arcadia,3.525773,1.845361,0.148571,309.958763,2016.1584,90598.909091
3,Artesia,3.714953,1.82243,0.198358,365.831776,3906.7655,57094.666667
4,Azusa,4.0,1.0,0.042708,3.0,2009.0266,60506.909091


In [20]:
# Rename columns and cut series to different scales

extended_df_radar_group['rating_bin'] = pd.qcut(extended_df_radar_group['rating'],
       5, labels=[1,2,3,4,5])
extended_df_radar_group['price_bin'] = pd.qcut(extended_df_radar_group['Price'],
       5, labels=[1,2,3,4,5])
extended_df_radar_group['positiveness_bin'] = pd.qcut(extended_df_radar_group['polarity'],
       5, labels=[1,2,3,4,5])
extended_df_radar_group['review_num_bin'] = pd.qcut(extended_df_radar_group['review_count'],
       5, labels=[1,2,3,4,5])
extended_df_radar_group['population_density_bin'] = pd.qcut(extended_df_radar_group['density'],
       5, labels=[1,2,3,4,5])
extended_df_radar_group['income_bin'] = pd.qcut(extended_df_radar_group['MHI2016'],
       5, labels=[1,2,3,4,5])

radar_df= extended_df_radar_group[['City','rating_bin', 'price_bin','positiveness_bin','review_num_bin', 'population_density_bin', 'income_bin']]

# Line Chart

In [21]:
#Read covid data and cleaning for time and city columns

covid = pd.read_csv(r'E:\LMU\rmds_lab\competition_2021\Q3_competition\sample_dashboard\inputs\latimes-place-totals.csv')
covid_la = covid[covid.county=='Los Angeles']
covid_la['datetime'] = covid_la['date'].apply(lambda x: date.fromisoformat(str(x)))
covid_la_2021 = covid_la[covid_la['datetime']>date.fromisoformat('2021-06-01')]
covid_la_2021.name.unique()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  covid_la['datetime'] = covid_la['date'].apply(lambda x: date.fromisoformat(str(x)))


array(['90802: Long Beach', '90803: Long Beach', '90804: Long Beach',
       '90805: Long Beach', '90806: Long Beach', '90807: Long Beach',
       '90808: Long Beach', '90810: Long Beach', '90813: Long Beach',
       '90814: Long Beach', '90815: Long Beach', '91101: Pasadena',
       '91103: Pasadena', '91104: Pasadena', '91105: Pasadena',
       '91106: Pasadena', '91107: Pasadena', 'Agoura Hills', 'Alhambra',
       'Arcadia', 'Artesia', 'Avalon', 'Azusa', 'Baldwin Park', 'Bell',
       'Bell Gardens', 'Bellflower', 'Beverly Hills', 'Bradbury',
       'Burbank', 'Calabasas', 'Carson', 'Cerritos', 'Claremont',
       'Commerce', 'Compton', 'Covina', 'Cudahy', 'Culver City',
       'Diamond Bar', 'Downey', 'Duarte', 'El Monte', 'El Segundo',
       'Gardena', 'Glendale', 'Glendora', 'Hawaiian Gardens', 'Hawthorne',
       'Hermosa Beach', 'Hidden Hills', 'Huntington Park', 'Industry',
       'Inglewood', 'Irwindale', 'La Canada Flintridge',
       'La Habra Heights', 'La Mirada', 'La P

In [22]:
# Notice two large cities have smaller areas, we have to merge them into one

covid_la_2021['city'] = covid_la_2021.name.apply(lambda x: x.split(':')[-1].strip().lower())
covid_la_2021['city'] = covid_la_2021.city.apply(lambda x: str(x)[0].upper()+ str(x)[1:])

covid_la_2021_pasadena = covid_la_2021[covid_la_2021['city']=='Pasadena'].groupby('datetime').sum().reset_index()
covid_la_2021_pasadena['city']='Pasadena'
covid_la_2021_lb = covid_la_2021[covid_la_2021['city']=='Long beach'].groupby('datetime').sum().reset_index()
covid_la_2021_lb['city']='Long beach'

covid_la_2021_rest = covid_la_2021[(covid_la_2021['city']!='Pasadena') & (covid_la_2021['city']!='Long beach')]
covid_la_2021_rest_data = covid_la_2021_rest[['city','datetime','fips','confirmed_cases','population']].copy()

# Now merge them back

covid_la_2021_sub_all = pd.concat([covid_la_2021_lb,covid_la_2021_pasadena])
covid_la_2021_all = pd.concat([covid_la_2021_sub_all,covid_la_2021_rest_data])
covid_la_2021_all_select = covid_la_2021_all[covid_la_2021_all['city'].isin(city_list)]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  covid_la_2021['city'] = covid_la_2021.name.apply(lambda x: x.split(':')[-1].strip().lower())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  covid_la_2021['city'] = covid_la_2021.city.apply(lambda x: str(x)[0].upper()+ str(x)[1:])


In [23]:
# Get daily new cases per city and merge them back

new_cases = pd.DataFrame(None)

for city in covid_la_2021_all_select.city.unique():
    temp_df = covid_la_2021_all_select[covid_la_2021_all_select['city'] == city]
    temp_df = temp_df.sort_values(by='datetime')
    temp_df['next_day_cases']  = temp_df['confirmed_cases'].shift(-1)
    new_cases = pd.concat([temp_df, new_cases])

new_cases['new_daily_cases'] = new_cases['next_day_cases'] - new_cases['confirmed_cases']
new_cases = new_cases[new_cases['new_daily_cases']>=0]

line_df = new_cases.sort_values(by=['city','datetime']).reset_index().drop('index',axis=1)
line_df.head()

Unnamed: 0,datetime,fips,confirmed_cases,population,city,next_day_cases,new_daily_cases
0,2021-06-02,37,2688,34520.0,Beverly hills,2689.0,1.0
1,2021-06-03,37,2689,34520.0,Beverly hills,2690.0,1.0
2,2021-06-04,37,2690,34520.0,Beverly hills,2691.0,1.0
3,2021-06-05,37,2691,34520.0,Beverly hills,2692.0,1.0
4,2021-06-06,37,2692,34520.0,Beverly hills,2692.0,0.0


# App setup

In [24]:
# Setup the style from the link:
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
# Embed the style to the dashabord:
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
#app = dash.Dash(__name__)

In [25]:
app.layout = html.Div([
    html.H2(children="Restaurant Analytical Dashboard"),
    
    # Set up Dropdown
    dcc.Dropdown(id='dpdn1', value='Chinese', multi=False, 
                 options=[{'label': x, 'value': x} for x in category_list]),

    html.Div([dcc.Graph(id='map-graph', figure={}, className='five columns'),
              dcc.Graph(id='pie-graph', figure={}, className='three columns'),
              dcc.Graph(id='bar-graph', figure={}, className='three columns')
             ]),
    
    dcc.Dropdown(id='dpdn2', value=['Long beach','Pasadena'], multi=True, 
                 options=[{'label': x, 'value': x} for x in city_list]),
    
    html.Div([dcc.Graph(id='heatmap-graph', figure={}, className='five columns'), 
              dcc.Graph(id='radar-graph', figure={}, className='three columns'),
              dcc.Graph(id='line-graph', figure={}, className='three columns')
             ]),
    
    # Set up some end notes
    html.H2(children= 'Produced by RMDS Lab for 2021 Q3 Competition'),
    
    dcc.Link(children= 'See Competition Page    &   ', href = 'https://grmds.org/Q3/Comp', target = 'https://grmds.org/Q3/Comp'),
    
    dcc.Link(children= 'RMDS Covid Risk Map', href = 'https://grmds.org/risk/', target = 'https://grmds.org/risk/')

], style={"text-align": "center"})



#---------------------------------------------------------------

# Output graphs with input from dropdown

@app.callback(
    Output(component_id='map-graph', component_property='figure'),
    Input(component_id='dpdn1', component_property='value'),
)


def update_map(category):
    
    if category == 0:
        select_df_1 = map_df
    else:
        select_df_1 = map_df[(map_df['categories01']==category) | 
                             (map_df['categories02']==category) | 
                             (map_df['categories03']==category)
                            ]
    
    select_df_1['Rating'] = select_df_1['rating']
    
    chart1 = px.scatter_mapbox(select_df_1, lat="latitude", lon="longitude", color="Rating", size="Price", 
                               title='Where Are the Restaurants? <br> Geographic Distribution',
                               color_continuous_scale=px.colors.cyclical.IceFire, size_max=6, zoom=8,
                               mapbox_style="carto-positron", width=600, height =500 ) 
    chart1.update(layout=dict(title=dict(x=0.5)))
    
    return chart1



@app.callback(
    Output(component_id='pie-graph', component_property='figure'),
    Input(component_id='dpdn1', component_property='value'),
)

# Change chart when received diffrent inputs
def update_pie(category):
    if category == 0:
        select_df_2 = pie_df
    else:
        select_df_2 = pie_df[(pie_df['categories01']==category) | 
                             (pie_df['categories02']==category) | 
                             (pie_df['categories03']==category)
                            ]
        
    chart2= px.pie(
                data_frame=select_df_2,
                values =select_df_2.groupby('Value')['City'].count().sort_values(ascending=False)[:10],
                title='What Concerns Reviewers? <br> Review Topics', 
                names=select_df_2.groupby('Value').count().sort_values(by='City',ascending=False).index[:10],
                hole=.3,
                color_discrete_sequence=px.colors.qualitative.G10,
                width=400, height =500
                )

    chart2.update(layout=dict(title=dict(x=0.5)))    
    
    return chart2



    
    
    
@app.callback(
    Output(component_id='bar-graph', component_property='figure'),
    Input(component_id='dpdn1', component_property='value'),
)    
def update_bar(category):
    
    if category == 0:
        select_df_3 = bar_df
    else:
        select_df_3 = bar_df[(bar_df['categories01']==category) | 
                             (bar_df['categories02']==category) | 
                             (bar_df['categories03']==category)
                            ]
         
    chart3 = px.bar(y=select_df_3.groupby('price')['review_count'].mean(),
                    title='How many Reviews? <br> #Reviews by $ Level', 
                    x=select_df_3.groupby('price')['review_count'].mean().index,
                    width=500, height=500
                   )
    
    chart3.update(layout=dict(title=dict(x=0.5)))
    chart3.update_layout(xaxis_title="Price Level", yaxis_title="Average Reviews")
    
    return chart3




@app.callback(
    Output(component_id='heatmap-graph', component_property='figure'),
    Input(component_id='dpdn2', component_property='value'),
)
def update_heatmap(cityname):
    
    #Create a pivot table
    local_df = heatmap_df[heatmap_df['City'].isin(cityname)]
    local_df_clean = local_df[['time', 'weekday','review_id']]
    local_df_pivot = local_df_clean.pivot_table(index = 'time', columns='weekday', aggfunc='count')

    chart4 = px.imshow(local_df_pivot,
                       width=600, height =500, 
                       title = 'When Do They Post Reviews? <br> Review Frequency',
                       labels=dict(x="Weekday", y="Time", color="#Review"),
                                   x=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday','Sunday'],
                                   y=['Morning', 'Afternoon', 'Evening', 'Night']
                                  )
    chart4.update_xaxes(side="bottom")
    chart4.update(layout=dict(title=dict(x=0.5)))

    return chart4



@app.callback(
    Output(component_id='radar-graph', component_property='figure'),
    Input(component_id='dpdn2', component_property='value'),
)
def update_radar(cityname):
    
    local_df_2 = radar_df[radar_df['City'].isin(cityname)]
    local_df_2.columns= ['City', 'Rating', 'Price', '+Review', '#Review', 'Pop Density', 'Income']
    local_df_2_stacked = local_df_2.set_index('City').stack().reset_index()

    chart5 = px.line_polar(local_df_2_stacked, r=0, theta="level_1", color="City", line_close=True,
                           width=400, height =500, 
                           title = 'How Does Your City Compare? <br>City Comparability',
                           color_discrete_sequence=px.colors.qualitative.G10)
    chart5.update(layout=dict(title=dict(x=0.5)))

    return chart5



@app.callback(
    Output(component_id='line-graph', component_property='figure'),
    Input(component_id='dpdn2', component_property='value'),
)
def update_line(cityname):
    
    local_df_3 = line_df[line_df['city'].isin(cityname)]
    
    local_df_3['Date'] = local_df_3['datetime']
    local_df_3['New confirmed cases'] = local_df_3['new_daily_cases']
    local_df_3['City'] = local_df_3['city']

    chart6 = px.line(local_df_3, x='Date', y='New confirmed cases',
                     color='City', title='Is COVID Affecting Your City? <br> COVID Outbreak by City',
                    width=500, height =500, color_discrete_sequence=px.colors.qualitative.G10)
    
    chart6.update_xaxes(side="bottom")
    chart6.update(layout=dict(title=dict(x=0.5)))
    
    return chart6






In [26]:
if __name__ == "__main__":
    app.run_server()

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [03/Sep/2021 11:48:57] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [03/Sep/2021 11:48:59] "[37mGET /_dash-component-suites/dash_html_components/dash_html_components.v1_1_4m1629313584.min.js HTTP/1.1[0m" 200 -
127.0.0.1 - - [03/Sep/2021 11:48:59] "[37mGET /_dash-component-suites/dash/deps/prop-types@15.v1_21_0m1629313585.7.2.min.js HTTP/1.1[0m" 200 -
127.0.0.1 - - [03/Sep/2021 11:48:59] "[37mGET /_dash-component-suites/dash/deps/react-dom@16.v1_21_0m1629313585.14.0.min.js HTTP/1.1[0m" 200 -
127.0.0.1 - - [03/Sep/2021 11:48:59] "[37mGET /_dash-component-suites/dash/deps/react@16.v1_21_0m1629313585.14.0.min.js HTTP/1.1[0m" 200 -
127.0.0.1 - - [03/Sep/2021 11:48:59] "[37mGET /_dash-component-suites/dash_core_components/dash_core_components-shared.v1_17_1m1629313583.js HTTP/1.1[0m" 200 -
127.0.0.1 - - [03/Sep/2021 11:48:59] "[37mGET /_dash-component-suites/dash/deps/polyfill@7.v1_21_0m1629313585.12.1.min.