# Restaurant Topicnizer

#### By Ari Conati, Matti Kukkamäki, Savanna Lujan

Web App Description Placeholder

In [7]:
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go

from ipywidgets import Layout
from ipywidgets import HBox, VBox
from ipywidgets import Dropdown
from ipywidgets import Text, Textarea

NUM_TOPICS = 5
TOPICS = ["Topic{}".format(i) for i in range(NUM_TOPICS)]
#SCATTER_COLORS = ["#ff0000", "#ffa500", "#ffff00", "#00ff00", "#0000ff"]*(NUM_TOPICS//5)

In [2]:
def get_weighted_averages(df, num_topics):                                                             
    topic_q = {"Topic{}".format(i): df["Topic{}".format(i)].sum() for i in range(num_topics)}
    topic_wa = {"Topic{}".format(i): (df["stars"]*df["Topic{}".format(i)]).sum()/topic_q["Topic{}".format(i)] for i in range(num_topics)}
    return topic_wa


def weighted_averages_by_year(df, num_topics):
    years = df["year"].unique()
    year_dfs = [df[df['year'] == years[i]] for i in range(len(years))]
    topic_dfs = [[] for i in range(num_topics)]
    for i, year_df in enumerate(year_dfs):
        topic_wa = get_weighted_averages(year_df, num_topics)
        for x in range(num_topics):
            topic_dfs[x].append((years[i], topic_wa["Topic{}".format(x)]))
            
    topic_dfs = [pd.DataFrame(x, columns=['year', 'score']) for x in topic_dfs]
    topic_dfs = [x.sort_values(["year"]) for x in topic_dfs]
    return topic_dfs

In [11]:
directory = os.fsencode("RestaurantData")
restaurant_files = []

for file in sorted(os.listdir(directory)):
    filename = os.fsdecode(file)
    if filename.endswith(".csv"):
        restaurant_files.append(os.path.join(os.fsdecode(directory), filename))
        
raw_dfs = [pd.read_csv(x) for x in restaurant_files]
restaurant_dfs = [weighted_averages_by_year(x, NUM_TOPICS) for x in raw_dfs]
overall_dfs = [get_weighted_averages(x, NUM_TOPICS) for x in raw_dfs]
overall_dfs = [pd.DataFrame(data={"Topic": TOPICS, "Score": [x[topic] for topic in TOPICS]}) for x in overall_dfs]

restaurant_names = [os.path.splitext(filename)[0] for filename in restaurant_files]
restaurant_names = [filename.replace('_', ' ')[filename.index("/")+1:] for filename in restaurant_names]
restaurant_names = [" ".join(filename.split()[:-2]) for filename in restaurant_names]

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,stars,useful,funny,cool,text,name,city,state,...,RestaurantsTableService,DriveThru,DogsAllowed,BYOBCorkage,Corkage,Topic0,Topic1,Topic2,Topic3,Topic4
0,0,0,2,1,0,0,I like Burger King but this location always ha...,Burger King,Atlanta,GA,...,,,,,,0.379289,0.003651,0.320878,0.292607,0.003575
1,1,1,5,1,0,0,"The food here is normal Burger King, but the a...",Burger King,Atlanta,GA,...,,,,,,0.283760,0.011413,0.090743,0.602964,0.011119
2,2,2,1,1,0,0,Nice guy at the window. The food was cold larg...,Burger King,Atlanta,GA,...,,,,,,0.526261,0.014377,0.014803,0.430257,0.014301
3,3,3,1,1,2,1,I visited this location two consecutive mornin...,Burger King,Atlanta,GA,...,,,,,,0.334497,0.003439,0.127473,0.531195,0.003395
4,4,4,5,0,0,1,I stop by this location on my way to work. Ser...,Burger King,Atlanta,GA,...,,,,,,0.012907,0.012558,0.794062,0.167927,0.012546
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
226,226,0,1,0,0,0,I guess all the employees celebrated Christmad...,Burger King,Atlanta,GA,...,,,,,,0.171431,0.011362,0.096360,0.703835,0.017012
227,227,1,1,0,0,0,Unless this is your ONLY choice do t stop here...,Burger King,Atlanta,GA,...,,,,,,0.152047,0.005728,0.363384,0.473122,0.005719
228,228,2,1,0,0,0,Whoever the owner of this franchise is should ...,Burger King,Atlanta,GA,...,,,,,,0.495325,0.004401,0.112936,0.382983,0.004354
229,229,3,1,0,0,0,Lied and said they did not have chicken nugget...,Burger King,Atlanta,GA,...,,,,,,0.018565,0.018219,0.440877,0.504138,0.018201


In [31]:
x_label = "Year"
y_label = "Review Score"
df = restaurant_dfs[0][0]
overall = overall_dfs[0]
crdf = raw_dfs[0].sort_values(by=TOPICS[0]).reset_index()

g = go.FigureWidget(data=go.Scatter(x=df["year"], y=df["score"]),
                   layout=go.Layout(
                   title={
                       "text": "Reviews for {} ({})".format(restaurant_names[0], TOPICS[0]),
                       "x": 0.5,
                       "y": 0.9,
                       "xanchor": "center",
                       "yanchor": "top"},
                       xaxis_title=x_label,
                       yaxis_title=y_label,
                       yaxis=dict(
                           range=[0,5]
                       )
                   ))

y=np.array(overall["Score"])
color=np.array(['rgb(255, 165, 0)']*y.shape[0])
color[y>3.5]='rgb(0,180,0)'
color[y<3]='rgb(180,0,0)'

g_over = go.FigureWidget(data=go.Bar(x=overall["Topic"], y=overall["Score"], width=[0.5]*NUM_TOPICS, marker=dict(color=color.tolist())),
                   layout=go.Layout(
                   title={
                       "text": "Reviews for {}".format(restaurant_names[0]),
                       "x": 0.5,
                       "y": 0.9,
                       "xanchor": "center",
                       "yanchor": "top"},
                       xaxis_title="Topic",
                       yaxis_title=y_label,
                       yaxis=dict(
                           range=[0,5]
                       )
                   ))

sample_review_layout = Layout(width='1000px', height='200px')
sample_review1 = Textarea(
    value = crdf["text"][0],
    disabled = True,
    layout = sample_review_layout
)
sample_review2 = Textarea(
    value = crdf["text"][1],
    disabled = True,
    layout = sample_review_layout
)
sample_review3 = Textarea(
    value = crdf["text"][2],
    disabled = True,
    layout = sample_review_layout
)
sample_review4 = Textarea(
    value = crdf["text"][3],
    disabled = True,
    layout = sample_review_layout
)
sample_review5 = Textarea(
    value = crdf["text"][4],
    disabled = True,
    layout = sample_review_layout
)

In [34]:
select_topic = Dropdown(options = TOPICS, layout=Layout(width = '160px'))
select_restaurant = Dropdown(options = restaurant_names, layout=Layout(width = '160px'))


def update(change):
    with g.batch_update():
        index_t = TOPICS.index(select_topic.value)
        index_r = restaurant_names.index(select_restaurant.value)
        g.data[0].y = restaurant_dfs[index_r][index_t]["score"]
        #g.data[0].line=dict(color=SCATTER_COLORS[index_t])
        crdf = raw_dfs[index_r].sort_values(by=TOPICS[index_t]).reset_index()
        
        g.layout = go.Layout(
                       title={
                           "text": "Reviews for {} ({})".format(restaurant_names[index_r], TOPICS[index_t]),
                           "x": 0.5,
                           "y": 0.9,
                           "xanchor": "center",
                           "yanchor": "top"},
                           xaxis_title=x_label,
                           yaxis_title=y_label,
                           yaxis=dict(
                               range=[0,5]
                       ))
        g_over.data[0].y = overall_dfs[index_r].Score
        
        y=np.array(overall_dfs[index_r].Score)
        color=np.array(['rgb(255, 165, 0)']*y.shape[0])
        color[y>3.5]='rgb(0,180,0)'
        color[y<3]='rgb(180,0,0)'
        g_over.data[0].marker = dict(color=color.tolist())
        
        g_over.layout = go.Layout(
                   title={
                       "text": "Reviews for {}".format(restaurant_names[index_r]),
                       "x": 0.5,
                       "y": 0.9,
                       "xanchor": "center",
                       "yanchor": "top"},
                       xaxis_title="Topic",
                       yaxis_title=y_label,
                       yaxis=dict(
                           range=[0,5]
                       )
                   )
        
        sample_review1.value = crdf["text"][0]
        sample_review2.value = crdf["text"][1]
        sample_review3.value = crdf["text"][2]
        sample_review4.value = crdf["text"][3]
        sample_review5.value = crdf["text"][4]

select_topic.observe(update, names="value")
select_restaurant.observe(update, names="value")

dropdown = HBox([select_restaurant, select_topic])
gd = HBox([g_over, g])
full = VBox([dropdown, gd])

sample_reviews = VBox([sample_review1, sample_review2, sample_review3, sample_review4, sample_review5])
app_display = VBox([full, sample_reviews])
app_display

VBox(children=(VBox(children=(HBox(children=(Dropdown(layout=Layout(width='160px'), options=('Burger King', 'C…