# **Project Title**
#### *Project Subtitle*

## Hypothesis;

Project thesis

In [None]:
# Importing dependencies
import pandas as pd

# **Data**

## Dataset

Dataset details

In [None]:
# Reading in data
# (potentially?)

### EDA

In [None]:
# Beginning EDA

# **Ramona's Code Space**

*End Code Space*

# **Christian's Code Space**

### Dependencies

In [None]:
# Installing gdown (uncomment if needed)
# %pip install gdown --quiet

In [None]:
# Imports and dependencies
import os
import re
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# json
import json

# gdown
import gdown

#### Resources path

In [None]:
# Defining a function to access datasets through `gdown`
def fetch_data(set):
    # Declaring `url` and `output` for dataset
    match set:
        case 'business':
            url = 'https://drive.google.com/file/d/1t-_rOjZ8oMqPcMJunVaMgY3OEbhnuSCv/view?usp=sharing'
            output = 'Resources/business_dataset.csv'
        case 'checkin':
            url = 'https://drive.google.com/file/d/1_AVWp31ymfvf4QgTiMN_WLAeapfr0omf/view?usp=sharing'
            output = 'Resources/checkin_dataset.csv'
        case 'reviews':
            url = 'https://drive.google.com/file/d/1L8rFjhOQyU90Ycr9t_OLA70vCYM0e7ck/view?usp=sharing'
            output = 'Resources/reviews_dataset.csv'
        case 'tip':
            url = 'https://drive.google.com/file/d/1LMkCi5AFC_58_m7ELmn1hR8YDykuXwqq/view?usp=sharing'
            output = 'Resources/tip_dataset.csv'
        case 'user':
            url = 'https://drive.google.com/file/d/1kQ522qcod7AjD5DO9vj8qFcSKxwJCDrO/view?usp=sharing'
            output = 'Resources/user_dataset.csv'
        case _:
            print('Invalid dataset selected, please try again')
            return None
    
    # Downloading dataset
    gdown.download(url, output, fuzzy=True, quiet=True)

    # Reading in the dataset
    df = pd.read_csv(output)

    # Returning the dataset
    return df

---

#### Business dataset

#### <font color='blue'> Description:</font> 
**Contains business data including location data, attributes, and categories.**

#### Loading data

In [None]:
# Fetching `business_dataset`
business_df = fetch_data('business')

#### Overview

In [None]:
business_df.head()

#### Info

In [None]:
business_df.info()

---

#### Checkin dataset

#### <font color='blue'> Description:</font>
**Checkins on a business.**

#### Loading Data

In [None]:
# Fethching `checkin_dataset`
checkin_df = fetch_data('checkin')

#### Overview

In [None]:
checkin_df.head()

#### Info

In [None]:
checkin_df.info()

#### **<font color='orange'> Notes:</font>**
**The team has determined this dataset would not add any value to our training data.**

---

#### Reviews dataset

#### <font color = 'blue'>Description:</font>
**Contains full review text data including the user_id that wrote the review and the business_id the review is written for.**

#### Loading Data

In [None]:
# Fetching `reviews_dataset`
reviews_df = fetch_data('reviews')

#### Overview

In [None]:
reviews_df.head()

#### Info

In [None]:
reviews_df.info()

#### Na count

In [None]:
reviews_df.isna().sum()

#### Dropping columns:
- **review_id**
- **useful**
- **funny**
- **cool**

In [None]:
reviews_df.drop(columns = ['review_id','useful','funny','cool'],
                inplace = True)

#### Renaming the 'text' field to 'review'

In [None]:
reviews_df.rename(columns = {'text':'review'},inplace = True)
reviews_df.head()

#### **<font color='orange'> Notes:</font>**
- **review_id: Eliminated due to low informational value.**
- **useful: Eliminated due to low relevance.**
- **funny: Eliminated due to low relevance.**
- **cool: Eliminated due to low relevance.**

  **The *<font color='green'>'business_id'</font>* feature will be used as the identifier, *<font color='green'>'stars'</font>* is the rating metric and the *<font color='grey'>'review'</font>*  field encapsulates**<br>
  **the data to be processed. the *<font color='green'>'date'</font>* variable is in place if time series analysis is needed.**

---

#### Tips dataset

#### <font color='blue'>Description:</font>
**Tips written by a user on a business. Tips are shorter than reviews and tend to convey quick suggestions.**

#### Loading Data

In [None]:
# Fetching `tips_dataset`
tips_df = fetch_data('tip')

#### Overview

In [None]:
tips_df.head()

#### Info

In [None]:
tips_df.info()

#### Dropping columns:
- **compliment_count**

In [None]:
tips_df.drop(columns = ['compliment_count'],
             inplace =True)

#### Renaming the 'text' column to 'recommendations'

In [None]:
tips_df.rename(columns = {'text':'recommendations'},inplace = True)
tips_df.head()

#### **<font color='orange'> Notes:</font>**
- **compliment_count: Eliminated due to low informational value.**


 **Since this data set has recommendations from the user to improve customer experience the 'recommendations' field could be a useful target variable.**

---

#### User dataset

#### <font color = 'blue'>Note:</font>
**User data including the user's friend mapping and all the metadata associated with the user.**

#### Loading Data

In [None]:
# Fetching `user_dataset`
user_df = fetch_data('user')

#### Overview

In [None]:
user_df.head()

#### Info

In [None]:
user_df.info()

#### **<font color='orange'> Notes:</font>**
**This data set will not be included in the training data to preserve user anonimity.**

# //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

## **<font color = 'darkgrey'>Merging the reviews data set and the business data set</font>**

#### <font color = 'blue'>Description:</font>
**This data set contains the fields that will be used to train the model**

In [None]:
data_df = reviews_df.merge(business_df,how='left',on = 'business_id')

In [None]:
data_df.info()

In [None]:
data_df.head()

#### Na count

In [None]:
data_df.isna().sum()

In [None]:
na_prcnt = data_df[['attributes','categories','hours']].isna().sum()/data_df.shape[0]*100
nas_df = pd.DataFrame(na_prcnt, columns=['percentage'])
nas_df = nas_df.transpose()
nas_df.round(4)

In [None]:
sns.barplot(data = nas_df).set_title('Na percentage')

#### **<font color='orange'> Notes:</font>**
**After consulting with the team we decided to drop all three columns.**

#### Dropping rows with na values

In [None]:
data_df.drop(columns = ['attributes','categories','hours'],inplace=True)

In [None]:
data_df.isna().sum()

#### *<font color='grey'>stars_x* and *<font color='grey'>stars_y* comparison

In [None]:
data_df.loc[data_df['stars_x'] != data_df['stars_y']][['stars_x','stars_y']].head()

#### *<font color='grey'>stars_x* and *<font color='grey'>stars_y*  for the same customer

In [None]:
data_df.loc[data_df['business_id']=='XQfwVwDr-v0ZS3_CbbE5Xw'][['stars_x','stars_y']].head()

#### *<font color='grey'>stars_x* average

In [None]:
round(data_df.loc[data_df['business_id']=='XQfwVwDr-v0ZS3_CbbE5Xw']['stars_x'].mean(),2)

#### **<font color='orange'> Notes:</font>**
**Because** *<font color='grey'> star_y</font>* **represents the average star rating, renaming** *<font color='grey'> star_y:</font>* **to:** *<font color='grey'> star_avg:</font>*

#### Renaming

In [None]:
data_df.rename(columns={'stars_y':'stars_avg','stars_x':'stars'},inplace = True)

#### Dropping is_open feature

In [None]:
fig,ax = plt.subplots()
sns.countplot(data_df,
             x='is_open',
             hue = 'is_open',
             ax = ax).set_title('is_open Feature')

#### droppin is_open

In [None]:
data_df.drop(columns = ['is_open'],inplace = True)

#### **<font color='orange'> Notes:</font>**
**After cosulting with the team we decided to drop this feature due low informational value and feature imbalance**

# //////////////////////////////////////////////////////////////////////////////////////////////////

## **<font color='darkgrey'>Merging with the tips data set exploration</font>**

#### <font color = 'blue'>Description:</font>
**Contains customer recommendatins to improve experience**

In [None]:
tips_df.head()

In [None]:
tips_df.info()

#### Quantity of unique business_id in the tips data set

In [None]:
display(tips_df['business_id'].unique().shape[0])

#### Quantity of unique business_id in  data_df

In [None]:
data_df['business_id'].unique().shape[0]

#### Subset of *<font color='grey'>business_id</font>* in *<font color='grey'>data_df</font>* not found in *<font color='grey'>tips_df</font>*.

In [None]:
no_tips_df = data_df[~data_df['business_id'].isin(tips_df['business_id'])]
no_tips_df.head()

#### Number of *<font color='grey'>business_id</font>* in *<font color='grey'>data_df</font>* not found in *<font color='grey'>tips_df</font>*.

In [None]:
no_tips_df = data_df[~data_df['business_id'].isin(tips_df['business_id'])]
not_found = no_tips_df['business_id'].unique().shape[0]
print(f'Number of business_ids in tips_df not found in data_df: {not_found}')

#### Evidence

In [None]:
tips_df.loc[tips_df['business_id'] == no_tips_df['business_id'].iloc[33]]

#### Merge

In [None]:
test_df = pd.merge(tips_df,data_df,
                   on = ['business_id','user_id'],
                   how = 'inner')
                         

#### Overview

In [None]:
test_df.info()

In [None]:
test_df.head()

#### Comparison review vs. recommendations

In [None]:
test_df[['review','recommendations']].head()

##### **<font color='orange'> Notes:</font>**
**The <font color='grey'>data_df</font> has approximately <font color='green'>7 million</font> entries and <font color='grey'>tips_df</font> about <font color='green'>1 million</font> after merging them we end up the a little under half a million**.<br>
**In the comparison above I don't see a difference between a review from the *reviews data set* and a recommendation from the *tips data set***.<br>
**As shown above we stand to loose a significant amount of data if a merge is performed**.

# ///////////////////////////////////////////////////////////////////////////////////////////////////

## <font color='darkgrey'>Final Data Overview</font>

#### Dropping the user_id column to preserv user anonimity

In [None]:
data_df.drop(columns = ['user_id'],inplace = True)

#### Overview

In [None]:
data_df.head()

#### Info

In [None]:
data_df.info()

#### Na verification

In [None]:
data_df.isna().sum()

*End Code Space*

# **Leigh's Code Space**

*End Code Space*

# **Angelica's Code Space**

In [None]:
#import required libraries
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd

# # create a function to get the address of the each location and add it to a dataframe 
# def address_Addition(business,address1,new_df):
#     new_df['business_name'] = business
#     address_list = address1.split(',')
#     new_df['address'] = address_list[0]
#     new_df['city'] = address_list[1]
#     return new_df

# initiate driver
driver = webdriver.Chrome(service = ChromeService(ChromeDriverManager().install()))

# url list with business locations
#url = ['https://www.google.com/maps/place/Tim+Hortons/@43.7607366,-79.5321831,14z/data=!4m10!1m2!2m1!1stim+hortons!3m6!1s0x882b31d93eab2809:0xa9ea7bb65f9da6ec!8m2!3d43.7607366!4d-79.4992241!15sCgt0aW0gaG9ydG9ucyIDiAEBWg0iC3RpbSBob3J0b25zkgEKcmVzdGF1cmFudOABAA!16s%2Fg%2F1vyxk0xz','https://www.google.com/maps/place/Tim+Hortons/@43.7607366,-79.5321831,14z/data=!4m10!1m2!2m1!1stim+hortons!3m6!1s0x882b302d70a29891:0xc279061e4a5c71bc!8m2!3d43.756124!4d-79.5152637!15sCgt0aW0gaG9ydG9ucyIDiAEBWg0iC3RpbSBob3J0b25zkgEKcmVzdGF1cmFudOABAA!16s%2Fg%2F1td38wkb']
url = ["https://www.google.com/maps/place/McDonald's/@43.7607329,-79.5321831,14z/data=!4m10!1m2!2m1!1sMcDonald's!3m6!1s0x882b31e6d3859eb1:0xc92a9af2d1385093!8m2!3d43.7624131!4d-79.490243!15sCgpNY0RvbmFsZCdzIgOIAQFaDCIKbWNkb25hbGQnc5IBFGZhc3RfZm9vZF9yZXN0YXVyYW504AEA!16s%2Fg%2F1hc604hjv?entry=ttu"]


driver.get(url[0])
time.sleep(5)

# Find the address of the location
response = BeautifulSoup(driver.page_source, 'html.parser')
business_name = response.find('h1',class_='DUwDvf lfPIob').text
business_name



    # address = response.find('div',class_= 'rogA2c').text

In [None]:
business_name

In [None]:
#import required libraries
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd

# create a function to get the address of the each location and add it to a dataframe 
def business_Overview(business,avg_rating,address1,new_df):
    new_df['business_name'] = business
    new_df['avg_rating'] = avg_rating
    address_list = address1.split(',')
    new_df['address'] = address_list[0]
    new_df['city'] = address_list[1]
    return new_df

# initiate driver
driver = webdriver.Chrome(service = ChromeService(ChromeDriverManager().install()))

# url list with business locations
#url = ['https://www.google.com/maps/place/Tim+Hortons/@43.7607366,-79.5321831,14z/data=!4m10!1m2!2m1!1stim+hortons!3m6!1s0x882b31d93eab2809:0xa9ea7bb65f9da6ec!8m2!3d43.7607366!4d-79.4992241!15sCgt0aW0gaG9ydG9ucyIDiAEBWg0iC3RpbSBob3J0b25zkgEKcmVzdGF1cmFudOABAA!16s%2Fg%2F1vyxk0xz','https://www.google.com/maps/place/Tim+Hortons/@43.7607366,-79.5321831,14z/data=!4m10!1m2!2m1!1stim+hortons!3m6!1s0x882b302d70a29891:0xc279061e4a5c71bc!8m2!3d43.756124!4d-79.5152637!15sCgt0aW0gaG9ydG9ucyIDiAEBWg0iC3RpbSBob3J0b25zkgEKcmVzdGF1cmFudOABAA!16s%2Fg%2F1td38wkb']
url = ["https://www.google.com/maps/place/McDonald's/@43.7607329,-79.5321831,14z/data=!4m10!1m2!2m1!1sMcDonald's!3m6!1s0x882b31e6d3859eb1:0xc92a9af2d1385093!8m2!3d43.7624131!4d-79.490243!15sCgpNY0RvbmFsZCdzIgOIAQFaDCIKbWNkb25hbGQnc5IBFGZhc3RfZm9vZF9yZXN0YXVyYW504AEA!16s%2Fg%2F1hc604hjv?entry=ttu"]


#create for loop to parse through the different locations in the url list above
c = 0
for i in range(0,len(url)):
    c = c+1
    driver.get(url[i])
    time.sleep(5)

    # Find the address of the location
    response = BeautifulSoup(driver.page_source, 'html.parser')
    business_name = response.find('h1',class_='DUwDvf lfPIob').text
    avg_rating = response.find('div',class_='fontDisplayLarge').text
    address = response.find('div',class_= 'rogA2c').text
    driver.find_element('xpath','//*[@id="QA0Szd"]/div/div/div[1]/div[3]/div/div[1]/div/div/div[2]/div[2]/div[1]/div[1]/div[2]/div/div[1]/div[2]').click()
    time.sleep(3)
    
    # By default, only 10 reviews can be extracted - to extract more reviews we have to scroll down the apge
    SCROLL_PAUSE_TIME = 5

    # Get scroll height
    last_height = driver.execute_script("return document.body.scrollHeight")

    number = 0

    while True:
        number = number+1

        # Scroll down to bottom

        #old_==ele = driver.find_element('xpath','//*[@id="QA0Szd"]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]')
        ele = driver.find_element('xpath','//*[@id="QA0Szd"]/div/div/div[1]/div[3]/div/div[1]/div/div/div[2]')
        driver.execute_script('arguments[0].scrollBy(0, 5000);', ele)

        # Wait to load page
        time.sleep(SCROLL_PAUSE_TIME)

        # Calculate new scroll height and compare with last scroll height
        #print(f'last height: {last_height}')

        ele = driver.find_element('xpath','//*[@id="QA0Szd"]/div/div/div[1]/div[3]/div/div[1]/div/div/div[2]')

        new_height = driver.execute_script("return arguments[0].scrollHeight", ele)

        #print(f'new height: {new_height}')

        if number == 5:
            break

        if new_height == last_height:
            break

        #print('cont')
        last_height = new_height
    next_item = driver.find_elements('xpath','//*[@id="QA0Szd"]/div/div/div[1]/div[3]/div/div[1]/div/div/div[2]/div[9]')
    time.sleep(3)

    #expand review by click on 'more' button
    for i in next_item:
        button = i.find_elements(By.TAG_NAME,'button')
        for m in button:
            if m.text == "More":
                m.click()
        time.sleep(5)

    response = BeautifulSoup(driver.page_source, 'html.parser')
    next_2 = response.find_all('div',class_ = 'jftiEf')

    #get review by passing it to a dictionary
    def get_review_summary(result_set):
        rev_dict = {
            'Review Name': [],
            'Review Text' : [],
            'Review Rating' : []}

        for result in result_set:
            review_name = result.find(class_='d4r55').text
            review_text = result.find('span',class_='wiI7pd').text
            review_rating = result.find(class_='kvMYJc')['aria-label']
            rev_dict['Review Name'].append(review_name)
            rev_dict['Review Text'].append(review_text)
            rev_dict['Review Rating'].append(review_rating)
        
         
        return(pd.DataFrame(rev_dict))

    df = get_review_summary(next_2)
    if c == 1:
        df1 = df.copy()
        final_df = business_Overview(business_name,avg_rating,address,df1)
    else:
        df2 = df.copy()
        final_df = business_Overview(business_name,avg_rating,address,df2)
        final_df = pd.concat([df1,final_df],axis = 0)
    

print(final_df)

In [None]:
# review df with reviews and locations
final_df


*End Code Space*

# **Odele's Code Space**

## Additional Libraries and Dependencies;

Application being developed with `Dash` by `Plotly`. Additional `pip install`s will be necessary.

In [None]:
# Install necessary packages
# Note: Uncomment if needed
# ! pip install dash
# ! pip install dash-bootstrap-components

In [1]:
# Import libraries and dependencies
import pandas as pd

# Dash
from dash import Dash, dcc, html, callback
from dash.dependencies import Input, Output, State
from dash.exceptions import PreventUpdate

# Dash Boostrap Components
import dash_bootstrap_components as dbc

# Plotly
import plotly.express as px
import plotly.graph_objects as go

# Other
import math

## Components;

Existing variables and functions may necessistate refactoring. Temporary facsimiles used for development.

In [2]:
# Components
'''
Inputs and declearations necessary to make app function:

user_input: a URL or name of a business (tbd) for which to fetch reviews

sbmt_bttn: a submit button

bus_id: an ID str or name of a business (tbd) to display for a given business

bus_loc: an array or a str (tbd) representing the location(s) of a given business

avg_rating: an int representing the average rating for a given business

tot_ratings: an int representing the toal reviews submitted for a given business

reviews: a dictionary of reviews containing a given rating and associated comment

sentiment: a str representing the generated sentiment analysis based on all reviews

recommendation: a str representing the generated recommendations based on the generated sentiment
'''

'\nInputs and declearations necessary to make app function:\n\nuser_input: a URL or name of a business (tbd) for which to fetch reviews\n\nsbmt_bttn: a submit button\n\nbus_id: an ID str or name of a business (tbd) to display for a given business\n\nbus_loc: an array or a str (tbd) representing the location(s) of a given business\n\navg_rating: an int representing the average rating for a given business\n\ntot_ratings: an int representing the toal reviews submitted for a given business\n\nreviews: a dictionary of reviews containing a given rating and associated comment\n\nsentiment: a str representing the generated sentiment analysis based on all reviews\n\nrecommendation: a str representing the generated recommendations based on the generated sentiment\n'

#### Temporary Components;

Placeholders until merging with more finlized components possible.

In [3]:
# Example data: Replace with your real data from an API
locations = [
    {"lat": 37.7749, "lon": -122.4194, "name": "San Francisco"},
    {"lat": 34.0522, "lon": -118.2437, "name": "Los Angeles"}
]

lat_loc = [loc["lat"] for loc in locations]
lon_loc = [loc["lon"] for loc in locations]

lat_mean = sum(lat_loc)/len(lat_loc)
lon_mean = sum(lon_loc)/len(lon_loc)

lat_min, lat_max = min(lat_loc), max(lat_loc)
lon_min, lon_max = min(lon_loc), max(lon_loc)

# Function to calculate the zoom level
def calculate_zoom_level(lat_min, lat_max, lon_min, lon_max):
    lat_diff = lat_max - lat_min
    lon_diff = lon_max - lon_min

    # Using `loc()` to scale zoom based on distances at slower rates for larger geographic areas
    zoom = min(7 - math.log(lat_diff + 0.1), 7 - math.log(lon_diff + 0.1))
    return max(zoom, 0)  # Ensure the zoom level is not negative

zoom_level = calculate_zoom_level(lat_min, lat_max, lon_min, lon_max)

# Create the map figure
fig = go.Figure(go.Scattermapbox(
    lat=lat_loc,
    lon=lon_loc,
    mode='markers',
    hovertext = ['<br>' + loc["name"] for loc in locations],
    marker=dict(size=10)
))

# Update layout with map style and other properties
fig.update_layout(
    mapbox={
        'style': "open-street-map",
        'center': {'lon': lon_mean, 'lat': lat_mean},  # Center to cover all locations
        'zoom': zoom_level
    },
    margin={"r":0,"t":0,"l":0,"b":0},
    height=500
)

In [4]:
# Temporary default map

# Create a default map centered on the US
fig_placeholder = go.Figure(go.Scattermapbox())
fig_placeholder.update_layout(
    mapbox={
        'style': "open-street-map",
        'center': {'lon': -98.583, 'lat': 39.833},
        'zoom': 2.5
    },
    margin={"r":0,"t":0,"l":0,"b":0},
    height=400
)

## App Development;

Initialization, construction, and launch of app.

In [6]:
# Initialize app
app = Dash(external_stylesheets=[dbc.themes.QUARTZ])

# Layout
app.layout = html.Div([
    # Stack to establish negative space for whole of app
    dbc.Stack(
        [
            # Blank col for spacing
            dbc.Col('', width=1),
            # Col with all of GUI
            dbc.Col(
                [
                    # Row for header
                    dbc.Row(
                        html.H1(
                            'Review Sentiment Analysis and Recommendations',
                            style={'textAlign':'center'}
                        ), style={'margin-top': '20px', 'margin-bottom': '20px'}
                    ),
                    # Row for subheader
                    dbc.Row(
                        html.H3(
                            'An interactive application to leverage customer reviews into improved businesss',
                            style={'textAlign':'center'}
                        ), style={'margin-bottom': '20px'}
                    ),
                    # Row for user input
                    dbc.Stack(
                        [
                            # User input
                            dbc.Input(
                                id='user_input',
                                type='text',
                                placeholder='Input a Business Name'
                            ),
                            # Submit button
                            dbc.Button(
                                'Submit',
                                id='sbmt_bttn',
                                n_clicks=0
                            )
                        ],
                        style={'margin-bottom': '20px'},
                        direction='horizontal',
                        gap=1
                    ),
                    # Row for business name and ratings
                    dbc.Stack(
                        [
                            # Business name
                            dbc.Col(
                                dbc.Card(children='Business Name', id='bus_nm', body=True), width=8
                            ),
                            # Average rating
                            dbc.Col(
                                dbc.Card(children='Average Rating', id='avg_rtng', body=True), width=2
                            ),
                            # Total reviews
                            dbc.Col(
                                dbc.Card(children='Total Reviews', id='tot_rvws', body=True), width=2
                            )
                        ],
                        style={'margin-bottom': '20px'},
                        direction='horizontal',
                        gap=1
                    ),
                    # Row for map and accordion
                    dbc.Stack(
                        [
                            # Map
                            dbc.Col(
                                dcc.Graph(figure=fig_placeholder, id='bus_map'),
                                width=5
                            ),
                            # accordion
                            dbc.Col(
                                dbc.Accordion(
                                    [
                                        # Reviews
                                        dbc.AccordionItem(
                                            html.P(
                                                id='reviews',
                                                children='Select a business to see reviews.',
                                                style={'max-height': '300px', 'overflow-y': 'auto'}
                                            ),
                                            title='Reviews'
                                        ),
                                        # Sentiment analysis
                                        dbc.AccordionItem(
                                            html.P(
                                                id='sentiment',
                                                children='Select a business to generate sentiment analysis.',
                                                style={'max-height': '300px', 'overflow-y': 'auto'}
                                            ),
                                            title='Sentiment Analysis'
                                        ),
                                        # Recommendations
                                        dbc.AccordionItem(
                                            html.P(
                                                id='recommend',
                                                children='Select a business to generate recommendations.',
                                                style={'max-height': '300px', 'overflow-y': 'auto'}
                                            ),
                                            title='Recommendations'
                                        )
                                    ]
                                ),
                                align='start',
                                width=7
                            )
                        ],
                        direction='horizontal',
                        gap=1
                    )
                ], width=10
            ),
            # Blank col for spacing
            dbc.Col('', width=1)
        ],
        direction='horizontal',
        gap=1
    )
])

# Callback (in place for development only, will move to own cell once developed)
@callback(
    Output('bus_nm', 'children'),       # Name of the business == `user_input`
    Output('avg_rtng', 'children'),     # Average rating out of 5 stars
    Output('tot_rvws', 'children'),     # Total number of submitted reviews
    Output('bus_map', 'figure'),        # Lat and Lon of business location(s)
    Output('reviews', 'children'),      # Any review(s) left by users 
    Output('sentiment', 'children'),    # Generated seniment analysis based on reviews
    Output('recommend', 'children'),    # Generated recommendations based on sentiment analysis
    Input('sbmt_bttn', 'n_clicks'),     # Submit button to begin generation of content
    State('user_input', 'value')        # Business name to be entered by user
)

# Update content
def update_content(n_clicks, user_input):
    if n_clicks == 0 or not user_input:
        raise PreventUpdate
    
    business_name = user_input
    avg_rating = f'Average Rating: {4.5}'
    total_reviews = f'Total Reviews: {n_clicks}'
    fig_update = fig
    reviews = 'Collected reviews go here. Content will scroll if height of 300px exceeded.'
    sentiment = 'Generated sentiment analysis goes here. Content will scroll if height of 300px exceeded.'
    recommendation = 'Generated recommendations go here. Content will scroll if height of 300px exceeded.'
    return business_name, avg_rating, total_reviews, fig_update, reviews, sentiment, recommendation

# Launch app (in place for development only, will move to own cell once developed)
app.run(jupyter_mode='tab')
# app.run_server(debug=True)

Dash app running on http://127.0.0.1:8050/


<IPython.core.display.Javascript object>

*End Code Space*

# **Vanessa's Code Space**

*End Code Space*

# **Train Test Splitting**

# **Scaling and Encoding**

# **Modeling**

# **Application (?)**

# **Findings**

# **Citations and Licenses**

## Citations

## Licenses