In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from bokeh.io import output_file, show
from bokeh.models import ColumnDataSource, GMapOptions
from bokeh.plotting import gmap
import plotly
import plotly.offline as po
plotly.offline.init_notebook_mode(connected=True)

import plotly.plotly as py
import plotly.graph_objs as go
import ipywidgets as widgets
from ipywidgets import interactive, HBox, VBox
import matplotlib.pyplot as plt
%matplotlib notebook

from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

In [2]:
business_data = pd.read_csv('busniess_data_test.csv', sep=',')
business_data.drop(business_data.columns[0], axis=1, inplace=True)
business_data.drop([31], inplace=True)

business_data['Number_of_Reviewers'].replace(regex=True,inplace=True,to_replace=r' reviews',value=r'')

cols = ['Number_of_Reviewers', 'Longitude', 'Latitude']
business_data[cols] = business_data[cols].apply(pd.to_numeric)

In [3]:
mapbox_access_token = 'pk.eyJ1IjoiZHJlYW15anBsIiwiYSI6ImNqbzFoNm00YjA2Z2kza3FuOWFmN2I3NXoifQ.cADYKcSYogjVVHBiNYvY9A'

In [4]:
def plt_gmap(center, lat, lon, name, size, color):
    data = [
        go.Scattermapbox(
            lat=lat,
            lon=lon,
            mode='markers',
            marker=dict(
                size=size,
                color=color,
                colorscale='Jet',
                colorbar = dict(
                    title = '{}'.format(color.name)),
                opacity = 0.6
            ),
            text=name,
        )
    ]

    layout = go.Layout(
        autosize=True,
        hovermode='closest',
        height=512,
        mapbox=dict(
            accesstoken=mapbox_access_token,
            bearing=0,
            center=dict(
                lat=center[0],
                lon=center[1]
            ),
            pitch=0,
            zoom=10,
        ),
        margin=dict(
            l=40,
            r=40,
            b=40,
            t=40
        )
    )

    fig = dict(data=data, layout=layout)
    return fig

In [5]:
center = [36.84, -76.1]
lat=list(business_data['Latitude'].values)
lon=list(business_data['Longitude'].values)
name = business_data['Name']
color = business_data['Star_Rating']
# size = (size-size.min())/(size.max()-size.min())
size = (color / color.max()) * 40

In [6]:
light_mode = 'mapbox://styles/mapbox/light-v9'
basic_mode = 'mapbox://styles/mapbox/streets-v10'
satellite_streets_mode = 'mapbox://styles/mapbox/satellite-streets-v9'
fig_type = [light_mode, basic_mode, satellite_streets_mode]

In [7]:
fig = plt_gmap(center, lat, lon, name, size, color)
f = go.FigureWidget(fig)

def update_color_size(color, size, fig_type):
    scatter = f.data[0]
    layout = f.layout
#     business_data[xaxis] = (business_data[xaxis]-business_data[xaxis].min())/(
#         business_data[xaxis].max()-business_data[xaxis].min())
#     business_data[yaxis] = (business_data[yaxis]-business_data[yaxis].min())/(
#         business_data[yaxis].max()-business_data[yaxis].min())

    scatter.marker.color = business_data[color]
#     scatter.marker.size = (business_data[size]-business_data[size].min())/(
#         business_data[size].max()-business_data[size].min()) * 50
    scatter.marker.size = (business_data[size]/business_data[size].max())*40
    scatter.marker.colorbar.title = '{}'.format(color)
    layout.mapbox.style = '{}'.format(fig_type)

axis_dropdowns = interactive(update_color_size, 
                             size = business_data.select_dtypes('number').columns, 
                             color = business_data.select_dtypes('number').columns,
                            fig_type = fig_type)

t = go.FigureWidget([go.Table(
    header=dict(values=['Name','Star_Rating','Number_of_Reviewers'],
                fill = dict(color='#C2D4FF'),
                align = ['left'] * 5),
    cells=dict(values=[business_data[col] for col in ['Name','Star_Rating','Number_of_Reviewers']],
               fill = dict(color='#F5F8FF'),
               align = ['left'] * 5))]
                   )

def selection_fn(trace,points,selector):
    t.data[0].cells.values = [business_data.loc[points.point_inds][col] for col in ['Name','Star_Rating','Number_of_Reviewers']]

f.data[0].on_selection(selection_fn)

# Put everything together
u = VBox((HBox(axis_dropdowns.children),f,t))

In [8]:
display(u)

VBox(children=(HBox(children=(Dropdown(description='color', options=('Star_Rating', 'Number_of_Reviewers', 'Mo…

In [9]:
business_data_plt = business_data[business_data.select_dtypes('number').columns[:-2]]
rndperm = np.random.permutation(business_data_plt.shape[0])
X_business_data_plt = business_data_plt.drop(business_data_plt.columns[0], axis=1)
y_business_data_plt = business_data_plt[business_data_plt.columns[0]]

pca = PCA(n_components=3)
pca_result = pca.fit_transform(X_business_data_plt.values)
pca.explained_variance_ratio_

array([9.97893704e-01, 1.16158841e-03, 5.18475399e-04])

In [10]:
X_business_data_plt_embedded = TSNE(n_components=3, random_state=42).fit_transform(X_business_data_plt)

In [11]:
business_data_tsne_embedded = go.Scatter3d(
    x=X_business_data_plt_embedded[:,0],
    y=X_business_data_plt_embedded[:,1],
    z=X_business_data_plt_embedded[:,2],
    mode='markers',
    marker=dict(
        size=12,
        color=y_business_data_plt,  # set color to an array/list of desired values
        colorscale='Viridis',       # choose a colorscale
        opacity=0.8,
        colorbar = dict(title = '{}'.format(y_business_data_plt.name))
    ),
    text = y_business_data_plt
)

data = [business_data_tsne_embedded]
layout = go.Layout(
     scene = dict(xaxis=dict(title='tsne[:,0]'),
                            yaxis=dict(title='tsne[:,1]'),
                            zaxis=dict(title='tsne[:,2]'),
                           ),
    margin=dict(
        l=0,
        r=0,
        b=0,
        t=0
    )
)
fig = go.Figure(data=data, layout=layout)
po.iplot(fig, filename='3d-scatter-colorscale')