In [74]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from collections import defaultdict
import heapq
import json
import math
import time

import imageio
import loading
import tqdm

from sklearn import linear_model
from sklearn.metrics import mean_squared_error
from sklearn import svm

import keras
import keras.backend as K
from keras.models import Sequential
from keras.layers import Dense, Lambda, Dropout

In [3]:
"""DEFINE CONSTANTS HERE"""

DATA_PATH           = '../data'
VISUALIZATION_PATH  = '../visualization'

MISSING_VALUE       = '<NONE>'       # Used for the 'neighborhood' and 'city' attributes.
DEFAULT_NEI_P       = 0.2            # Default percentile of neighborhoods to keep.
DEFAULT_CITY_P      = 0.1            # Default percentile of cities to keep.
DEFAULT_ATT_P       = 1.0            # Default percentile of attributes to keep.
DEFAULT_CAT_P       = 0.5            # Default percentile of categories to keep.
DEFAULT_HRS_P       = 1.0            # Default percentile of hours to keep.

TIME_GRANULARITY    = 1              # Granularity (ticks/hr) of time calculations, factor of 60. 

SLICE_BY            = ['Restaurants']

In [4]:
"""Loads the json file of the given dataset name."""
def load(name):
  start = time.time()
  data = loading.read_df_from_json('%s/%s.json' % (DATA_PATH, name))
  print 'time to load \'%s\': %.3fs' % (name, time.time() - start)
  return data

In [5]:
"""Cleans the business dataset."""
def clean_business(business):
  print 'Replacing %s with %s.' % (u'Montréal', u'Montreal')
  business['city'].replace(to_replace=u'Montréal', 
                           value=u'Montreal',
                           inplace=True)

In [6]:
"""Plots all businesses on the world map for visualization purposes."""
def plot_business(business):
  points = business[['latitude', 'longitude']]

  img = imageio.imread(VISUALIZATION_PATH + '/raw_map.jpg').astype('int64')
  img = img / 4               # Dim map.
  img = img[8:-8,8:-8,:]      # Clip borders.
  H, W, _ = img.shape
  
  scalar = 10                 # Amount to add to each channel.
  delta = np.zeros((H, W), dtype='int64')
  
  def get_xy(latitude, longitude):
    x = (W - 1) * (180.0 + longitude) / 360.0
    y = (H - 1) * (90.0 - latitude) / 180.0
    return int(x), int(y)

  for row in points.itertuples():
    latitude, longitude = row.latitude, row.longitude
    if not math.isnan(latitude) and not math.isnan(longitude):
      x, y = get_xy(latitude, longitude)
      delta[y,x] += scalar

  img += np.expand_dims(delta, axis=-1).repeat(3, axis=-1)
  img = img.clip(0, 255).astype('uint8')
  
  return img

In [7]:
"""Returns the count_dict as a sorted list."""
def to_list(count_dict):
  return sorted([(k, count_dict[k]) for k in count_dict], key=lambda v: v[1])

In [8]:
"""Converts a dict of counts (key, int) into a list of top features.

   Takes either top N (int) features, or top PERCENTILE (float) by occurrence.

   Example usage:
     top_features(count_dict, 0.1)                  # Returns top 10% of elements.
     top_features(count_dict, top_n=5)              # Returns top 5 elements.
"""
def top_features(count_dict, percentile=None, n=None, verbose=True):
  if n is None:
    if percentile is None:
      raise Exception
    n = int(percentile * len(count_dict))

  l = heapq.nlargest(n, count_dict, key=lambda k: count_dict[k])
  
  if verbose:
    percentage = 0.0 if not len(count_dict) else 100.0 * n / len(count_dict)
    params = (n, len(count_dict), percentage, 0 if not l else count_dict[l[-1]])
    print 'Took %d elements out of %d (%2.1f%%). Cutoff was >= %d.' % params
    
  return l

In [9]:
"""Methods for converting a tuple from itertuples() to a feature list."""

# Return neighborhood concatenated with city, or MISSING_VALUE if empty. 
def get_neighborhood(tup):
  assert type(tup.neighborhood) is unicode
  return [tup.neighborhood + '/' + tup.city if tup.neighborhood else MISSING_VALUE]

# Return city, or MISSING_VALUE if empty.
def get_city(tup):
  assert type(tup.city) is unicode
  return [tup.city if tup.city else MISSING_VALUE]

# Recursively process attributes dict to get indicators for all attributes.
def get_attributes(tup):
  def _recurse(attributes, prefix):
    assert type(attributes) is dict
    l = []
    for k, v in attributes.items():
      if type(v) is bool:
        l.append(prefix + '/' + k)
      elif type(v) is unicode:
        l.append(prefix + '/' + k + '/' + v)
      elif type(v) is int:
        l.append(prefix + '/' + k + '/' + str(v))
      elif type(v) is dict:
        l += _recurse(attributes[k], prefix=k)
      else:
        assert False  # Invalid type in attributes.
    return l
  return _recurse(tup.attributes, prefix='')

# Return categories.
def get_categories(tup):
  assert type(tup.categories) is list
  return tup.categories

"""Helper methods for get_hours(), which determines which ticks of time the business is open.
   Each tick of time corresponds to an index in [0, _max_ticks()).

   In _time_to_dt_index, ROUND_UP determines what happens when the time falls
   in between ticks. By default, the time is rounded up.
"""
# An ordering of the days of the week, and a map from str --> index
_day_order = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
_day_index = {d:i for i, d in enumerate(_day_order)}

# Maximum number of ticks
def _max_ticks():
  return 7 * 24 * TIME_GRANULARITY

# Given day of the week and time, returns the corresponding tick in [0, _max_ticks()).
def _time_to_dt_index(day, time, round_up=True):
  [hour, minutes] = time.split(':')
  hour_index = int(hour) * TIME_GRANULARITY
  min_index  = int(minutes) / (60 / TIME_GRANULARITY)
  if round_up and int(minutes) % (60 / TIME_GRANULARITY) > 0:
    min_index += 1
  return _day_index[day] * 24 * TIME_GRANULARITY + hour_index + min_index

# Cache the string corresponding to each tick of time.
_timestr_cache = ['%s/%02d:%02d' % (_day_order[day], hour, min_index * 60 / TIME_GRANULARITY)
                  for day in range(0, 7)
                  for hour in range(0, 24)
                  for min_index in range(0, TIME_GRANULARITY)]

def get_hours(tup):
  assert type(tup.hours) is dict
  l = []
  for day, hours in tup.hours.items():
    open_time, close_time = hours.split('-')
    open_index = _time_to_dt_index(day, open_time)
    close_index = _time_to_dt_index(day, close_time)

    assert 0 <= open_index and close_index <= _max_ticks()
    
    # Handle the case where close_index is for the following day.
    if close_index <= open_index:
      close_index += 24 * TIME_GRANULARITY
    
    # Append the slice of the _time_str_cache, handling wrap-around appropriately.
    l += _timestr_cache[open_index:min(close_index, _max_ticks())]
    if close_index > _max_ticks():
      l += _timestr_cache[0:close_index - _max_ticks()]
      
  return l

# Function to retrieve all features of a given row tuple.
all_fns = [get_neighborhood, get_city, get_attributes, get_categories, get_hours]
def get_all_features(tup):
  all_features = set([])
  for fn in all_fns:
    all_features |= set(fn(tup))
  return all_features

# Returns the value to regress on for the row tuple.
def get_target(tup):
  return float(tup.stars)

In [10]:
"""Get the features that we will use for 
   neighborhoods, cities, attributes, categories.

   By default, take:
     TOP 20% OF neighborhoods
     TOP 10% OF cities
         ALL OF attributes
     TOP 50% OF categories
         ALL OF hours
   
   Returns a length 6 tuple:
     (nei_set, city_set, att_set, cat_set, hours_set, debug_vals)
"""
def get_feature_sets(business, slice_by=[],
                     percentiles=[DEFAULT_NEI_P, DEFAULT_CITY_P, DEFAULT_ATT_P, 
                                  DEFAULT_CAT_P, DEFAULT_HRS_P]):
  def has_required_features(all_features):
    all_features_set = set([])
    for features in all_features:
      all_features_set |= set(features)
    for f in slice_by:
      if f not in all_features_set:
        return False
    return True
  
  fn_counts_percentile = zip(all_fns, [defaultdict(int) for _ in range(len(all_fns))], percentiles)

  for tup in tqdm.tqdm(business.itertuples()):
    all_features = [fn(tup) for fn, _, _ in fn_counts_percentile]
    if not has_required_features(all_features):
      continue
    for features, (_, counts, _) in zip(all_features, fn_counts_percentile):
      for f in features:
        counts[f] += 1

  debug_val = [(fn.__name__, to_list(counts)) for fn, counts, _ in fn_counts_percentile]
  
  return [top_features(counts, percentile) for fn, counts, percentile in fn_counts_percentile], debug_val

In [11]:
"""Gets a mapping from feature name to feature index and vice versa."""
def get_feature_maps(feature_sets, start_index=0):  
  # Assert that there are no overlapping names.
  union = set([])
  for s in feature_sets:
    union |= set(s)
  assert len(union) == sum([len(s) for s in feature_sets])

  name_to_index, index_to_name = {}, {}
  for i, feature in enumerate(union):
    name_to_index[feature] = start_index + i
    index_to_name[start_index + i] = feature

  return name_to_index, index_to_name

In [12]:
def slice_by_feature(x, y, name_to_index, feature_name):
  if feature_name not in name_to_index:
    print 'Feature %s not found.' % feature_name
    return x, y
  feature_index = name_to_index[feature_name]
  indices = np.where(x[:,feature_index] == 1)
  return x[indices], y[indices]

def slice_by_features(x, y, name_to_index, slice_by):
  for feature_name in slice_by:
    x, y = slice_by_feature(x, y, name_to_index, feature_name)
  return x, y

In [13]:
"""Creates a list of data points for multivariate linear regression."""
def get_training_data(business, name_to_index, slice_by=[]):
  x = np.zeros((business.shape[0], len(name_to_index)), dtype='float32')
  y = np.zeros(business.shape[0], dtype='float32')
  
  for i, tup in tqdm.tqdm(enumerate(business.itertuples())):
    all_features = get_all_features(tup)
    for f in all_features:
      if f in name_to_index:
        x[i,name_to_index[f]] = 1
    y[i] = get_target(tup)
  
  return slice_by_features(x, y, name_to_index, slice_by)

Run business regression.

NOT USABLE:
- business_id
- name

PROBABLY NOT USABLE:
- latitude
- longitude
- postal code
- address
- is_open
- review_count
- state

REGRESS ON:
- stars  (1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5)

FEATURES:
- neighborhood/city  --> indicators (~20%)
- city               --> indicators (~10%)
- attributes         --> indicators (process types differently, each has a separate indicator)
- categories         --> indicators (~50%)
- hours              --> indicators for every hour/half hour/quarter of the hour

In [14]:
business = load('business')
clean_business(business)

time to load 'business': 7.847s
Replacing Montréal with Montreal.


In [15]:
# img = plot_business(business)
# plt.imshow(img)
# plt.show()
# imageio.imsave(VISUALIZATION_PATH + '/business_map.jpg', img)

In [39]:
SLICE_BY = ['Restaurants']

feature_sets, debug_val = get_feature_sets(business, slice_by=SLICE_BY, 
                                           percentiles=[0.2, 0.1, 1, 0.5, 1])

156639it [00:12, 12692.29it/s]

Took 110 elements out of 550 (20.0%). Cutoff was >= 45.
Took 73 elements out of 734 (9.9%). Cutoff was >= 68.
Took 100 elements out of 100 (100.0%). Cutoff was >= 3.
Took 325 elements out of 651 (49.9%). Cutoff was >= 5.
Took 168 elements out of 168 (100.0%). Cutoff was >= 1373.





In [40]:
debug_val[3][1][-10:]

[(u'Burgers', 4236),
 (u'Italian', 4411),
 (u'Pizza', 5652),
 (u'American (Traditional)', 5737),
 (u'Fast Food', 5792),
 (u'Sandwiches', 5864),
 (u'Bars', 6690),
 (u'Nightlife', 6969),
 (u'Food', 9599),
 (u'Restaurants', 51613)]

In [41]:
feature_maps = get_feature_maps(feature_sets)
name_to_index, index_to_name = feature_maps  

In [42]:
x, y = get_training_data(business, name_to_index, slice_by=SLICE_BY)

156639it [00:14, 10610.09it/s]


In [76]:
print 'Slicing by:', SLICE_BY
print 'Samples in slice:', len(x)
print

reg = linear_model.Ridge(alpha=0.5)
reg.fit(x, y)

print 'Intercept =', reg.intercept_
print

N = 20
coefs = [(index_to_name[i], c) for i, c in enumerate(reg.coef_)]
for n, c in heapq.nlargest(N, coefs, key=lambda v: v[1]):
  print n, c
print '...'
for n, c in reversed(heapq.nsmallest(N, coefs, key=lambda v: v[1])):
  print n, c
print

y_hat = reg.predict(x)
print mean_squared_error(y, y_hat)

Slicing by: ['Restaurants']
Samples in slice: 156639

Intercept = 3.58061851914

/BusinessAcceptsBitcoin 0.342346757621
/GoodForKids 0.172942719921
/WheelchairAccessible 0.153347807609
/ByAppointmentOnly 0.151098584623
/NoiseLevel/quiet 0.138159549628
/RestaurantsAttire/dressy 0.136932100057
/DogsAllowed 0.124039173798
/BikeParking 0.112387147115
/Alcohol/beer_and_wine 0.111477314322
/BYOBCorkage/yes_corkage 0.11132263277
HairSpecializesIn/asian 0.0987743316461
HairSpecializesIn/africanamerican 0.0987743316458
HairSpecializesIn/straightperms 0.0987743316458
/GoodForDancing 0.0857124420639
/BYOBCorkage/no 0.0829163971722
/AcceptsInsurance 0.0809592772045
/BusinessAcceptsCreditCards 0.0804764951161
BusinessParking/street 0.0738306952085
BusinessParking/garage 0.0738306952074
BusinessParking/valet 0.073830695207
...
/Smoking/outdoor -0.0783826344536
Ambience/divey -0.0907304870965
/RestaurantsTableService -0.116634177
/NoiseLevel/loud -0.142102922107
Ambience/hipster -0.143509417396
/Rest

In [60]:
f = '/RestaurantsTableService'
print np.sum(x[:,name_to_index[f]])
print np.mean(y[np.where(x[:,name_to_index[f]] == 1)])

39954.0
3.47999


In [26]:
N = len(x)
x_slice = x[:N]
y_slice = y[:N]

print 'Slicing by:', SLICE_BY
print 'Samples in slice:', len(x_slice)
print

start = time.time()
reg = svm.SVR(kernel='linear', cache_size=10000)
reg.fit(x_slice, y_slice)
print 'Time taken:', time.time() - start
print

print 'Intercept =', reg.intercept_[0]
print

N = 20
coefs = [(index_to_name[i], c) for i, c in enumerate(reg.coef_[0])]
for n, c in heapq.nlargest(N, coefs, key=lambda v: v[1]):
  print n, c
print '...'
for n, c in reversed(heapq.nsmallest(N, coefs, key=lambda v: v[1])):
  print n, c
print

y_hat = reg.predict(x_slice)
print mean_squared_error(y_slice, y_hat)

Slicing by: ['Restaurants', 'Las Vegas']
Samples in slice: 51613

Time taken: 530.648411989

Intercept = 3.50246176322

Food Trucks 0.475872730223
Latin American 0.308654181542
Vegan 0.27386211335
Hawaiian 0.216550200952
/NoiseLevel/quiet 0.201479185074
Specialty Food 0.197366301258
Caterers 0.19032927206
Cafes 0.182996497465
Mediterranean 0.181079378919
Hot Dogs 0.176944669293
/BusinessAcceptsBitcoin 0.162573155289
/BikeParking 0.161776427853
Vegetarian 0.137435724471
Cocktail Bars 0.135960628156
/NoiseLevel/average 0.133369270457
/DogsAllowed 0.128246777394
Delis 0.126258413843
/DriveThru 0.113874243678
Ice Cream & Frozen Yogurt 0.107004670696
Juice Bars & Smoothies 0.102695444847
...
/RestaurantsGoodForGroups -0.0389996523818
/CoatCheck -0.0422329484661
/NoiseLevel/loud -0.0422953017803
Diners -0.0433000821345
Steakhouses -0.063128292076
/RestaurantsPriceRange2/2 -0.0765789787396
/HasTV -0.0789639951918
/RestaurantsTableService -0.0960308418842
Pizza -0.109437203201
/RestaurantsAtti

In [37]:
print 'Slicing by:', SLICE_BY
print 'Samples in slice:', len(x)
print

reg = linear_model.Lasso(alpha=0.01)
reg.fit(x, y)

print 'Intercept =', reg.intercept_
print

N = 10
coefs = [(index_to_name[i], c) for i, c in enumerate(reg.coef_)]
for n, c in heapq.nlargest(N, coefs, key=lambda v: v[1]):
  print n, c
print '...'
for n, c in reversed(heapq.nsmallest(N, coefs, key=lambda v: v[1])):
  print n, c
print

y_hat = reg.predict(x)
print mean_squared_error(y, y_hat)

Slicing by: ['Restaurants', 'Las Vegas']
Samples in slice: 51613

Intercept = 3.37994172865

Food 0.190584
/DogsAllowed 0.143745
/BikeParking 0.102871
/NoiseLevel/quiet 0.0657489
/WheelchairAccessible 0.0384163
/NoiseLevel/average 0.029308
/Alcohol/beer_and_wine 0.0136793
Cafes 0.0081211
/Caters 0.0
/Corkage 0.0
...
/Corkage 0.0
/Caters 0.0
/NoiseLevel/loud -0.00766474
/RestaurantsPriceRange2/2 -0.00887873
Chinese -0.0341109
Pizza -0.0405149
American (Traditional) -0.0650737
Chicken Wings -0.130297
Burgers -0.133637
Fast Food -0.451302

0.539111


In [39]:
np.mean(business['is_open'])

0.8443746448840965

In [None]:
# Returns the value to regress on for the row tuple.
def get_target(tup):
  return float(tup.is_open)
x, y = get_training_data(business, name_to_index, slice_by=SLICE_BY)

In [49]:
N = len(x)
x_slice = x[:N]
y_slice = y[:N]

print 'Slicing by:', SLICE_BY
print 'Samples in slice:', len(x_slice)
print

start = time.time()
reg = svm.SVC(kernel='linear', cache_size=10000)
reg.fit(x_slice, y_slice)
print 'Time taken:', time.time() - start
print

print 'Intercept =', reg.intercept_[0]
print

N = 20
coefs = [(index_to_name[i], c) for i, c in enumerate(reg.coef_[0])]
for n, c in heapq.nlargest(N, coefs, key=lambda v: v[1]):
  print n, c
print '...'
for n, c in reversed(heapq.nsmallest(N, coefs, key=lambda v: v[1])):
  print n, c
print

y_hat = reg.predict(x_slice)
print mean_squared_error(y_slice, y_hat)
print reg.score(x_slice, y_slice)
print np.mean(y_slice)

Slicing by: ['Restaurants', 'Las Vegas']
Samples in slice: 51613

Time taken: 258.449704885

Intercept = 0.999929675129

/BikeParking 1.9997699162
Ambience/romantic 0.285666837635
Ambience/intimate 0.285666837635
Ambience/trendy 0.285666837635
Ambience/touristy 0.285666837635
Ambience/casual 0.285666837635
Ambience/upscale 0.285666837635
Ambience/classy 0.285666837635
Event Planning & Services 0.00132228196326
Fast Food 0.00117647360533
/RestaurantsTakeOut 0.000818843758562
/RestaurantsDelivery 0.000498512307871
/RestaurantsReservations 0.000405198766737
Chicken Wings 0.000296510237379
/GoodForDancing 0.00019484718853
/GoodForKids 0.000190825523486
Food Trucks 0.000177822532715
Pizza 0.000126994878592
/RestaurantsGoodForGroups 0.00011773958747
Food Delivery Services 8.87794796975e-05
...
BusinessParking/street -0.000104530556225
BusinessParking/valet -0.000104530556225
BusinessParking/lot -0.000104530556225
BusinessParking/garage -0.000104530556225
Nightlife -0.000126712550558
Hot Dogs

In [77]:
for k in name_to_index.keys():
  if 'Price' in k:
    print k

/RestaurantsPriceRange2/4
/RestaurantsPriceRange2/3
/RestaurantsPriceRange2/2
/RestaurantsPriceRange2/1


In [76]:
print 'Slicing by:', SLICE_BY
print 'Samples in slice:', len(x)
print

classes = 9

print 'Shape:', x.shape
print 'Classes:', classes
print

def one_hot(y):
  y_new = np.zeros((len(y), classes))
  for i in range(classes):
    val = 1.0 + i * 0.5
    y_new[:,i] = (y == val)
  return y_new

def get_model():
  model = Sequential()
  model.add(Dense(units=512, activation='relu', input_dim=x.shape[1]))
  model.add(Dropout(rate=0.3))
  model.add(Dense(units=512, activation='relu'))
  model.add(Dropout(rate=0.3))
  model.add(Dense(units=256, activation='relu'))
  model.add(Dropout(rate=0.3))
  model.add(Dense(units=128, activation='relu'))
  model.add(Dense(units=classes, activation='softmax'))

  model.compile(loss='categorical_crossentropy',
                optimizer='adadelta',
                metrics=['accuracy'])
  return model

def shuffle(x, y):
  assert(x.shape[0] == y.shape[0])
  split = x.shape[1]
  z = np.hstack((x, y))
  z = np.random.permutation(z)
  return z[:,:split], z[:,split:]
  

x_train, y_train = shuffle(x, one_hot(y))
model = get_model()
history = model.fit(x_train, y_train, 
                    epochs=50, 
                    batch_size=128, 
                    validation_split=0.2,
                    verbose=2)


Slicing by: ['Restaurants']
Samples in slice: 51613

Shape: (51613, 776)
Classes: 9

Train on 41290 samples, validate on 10323 samples
Epoch 1/50
 - 2s - loss: 1.7927 - acc: 0.2614 - val_loss: 1.7033 - val_acc: 0.2961
Epoch 2/50
 - 2s - loss: 1.7113 - acc: 0.2923 - val_loss: 1.6807 - val_acc: 0.3055
Epoch 3/50
 - 2s - loss: 1.6836 - acc: 0.2996 - val_loss: 1.6686 - val_acc: 0.2994
Epoch 4/50
 - 2s - loss: 1.6699 - acc: 0.3062 - val_loss: 1.6697 - val_acc: 0.3010
Epoch 5/50
 - 2s - loss: 1.6595 - acc: 0.3109 - val_loss: 1.6493 - val_acc: 0.3067
Epoch 6/50
 - 2s - loss: 1.6492 - acc: 0.3103 - val_loss: 1.6566 - val_acc: 0.3042
Epoch 7/50
 - 2s - loss: 1.6437 - acc: 0.3154 - val_loss: 1.6477 - val_acc: 0.3105
Epoch 8/50
 - 2s - loss: 1.6359 - acc: 0.3149 - val_loss: 1.6342 - val_acc: 0.3166
Epoch 9/50
 - 2s - loss: 1.6304 - acc: 0.3184 - val_loss: 1.6352 - val_acc: 0.3084
Epoch 10/50
 - 2s - loss: 1.6248 - acc: 0.3242 - val_loss: 1.6343 - val_acc: 0.3167
Epoch 11/50
 - 2s - loss: 1.6188 -

In [79]:
print 'Slicing by:', SLICE_BY
print 'Samples in slice:', len(x)
print

print 'Shape:', x.shape
print 'Classes:', classes
print

def get_model():
  model = Sequential()
  model.add(Dense(units=512, activation='relu', input_dim=x.shape[1]))
  model.add(Dropout(rate=0.7))
  model.add(Dense(units=512, activation='relu'))
  model.add(Dropout(rate=0.7))
  model.add(Dense(units=256, activation='relu'))
  model.add(Dropout(rate=0.7))
  model.add(Dense(units=128, activation='relu'))
  model.add(Dense(units=1))
#   model.add(Lambda(lambda x: K.clip(x, min_value=1, max_value=5)))

  model.compile(loss='mse',
                optimizer='adadelta')
  return model

def shuffle(x, y):
  assert(x.shape[0] == y.shape[0])
  split = x.shape[1]
  z = np.hstack((x, y))
  z = np.random.permutation(z)
  return z[:,:split], z[:,split:]
  
x_train, y_train = shuffle(x, np.reshape(y, (y.shape[0], 1)))
model = get_model()
history = model.fit(x_train, y_train, 
                    epochs=100, 
                    batch_size=128, 
                    validation_split=0.2,
                    verbose=2)



Slicing by: ['Restaurants']
Samples in slice: 51613

Shape: (51613, 776)
Classes: 9

Train on 41290 samples, validate on 10323 samples
Epoch 1/100
 - 2s - loss: 1.5275 - val_loss: 4.5066
Epoch 2/100
 - 2s - loss: 0.8616 - val_loss: 4.0309
Epoch 3/100
 - 2s - loss: 0.7137 - val_loss: 2.6677
Epoch 4/100
 - 2s - loss: 0.6246 - val_loss: 1.6842
Epoch 5/100
 - 2s - loss: 0.5697 - val_loss: 1.0374
Epoch 6/100
 - 2s - loss: 0.5413 - val_loss: 0.8929
Epoch 7/100
 - 2s - loss: 0.5277 - val_loss: 0.7991
Epoch 8/100
 - 2s - loss: 0.5230 - val_loss: 0.7243
Epoch 9/100
 - 2s - loss: 0.5179 - val_loss: 0.6914
Epoch 10/100
 - 2s - loss: 0.5138 - val_loss: 0.7435
Epoch 11/100
 - 2s - loss: 0.5101 - val_loss: 0.7025
Epoch 12/100
 - 2s - loss: 0.5041 - val_loss: 0.6520
Epoch 13/100
 - 2s - loss: 0.4907 - val_loss: 0.6232
Epoch 14/100
 - 2s - loss: 0.4829 - val_loss: 0.6089
Epoch 15/100
 - 2s - loss: 0.4766 - val_loss: 0.5709
Epoch 16/100
 - 2s - loss: 0.4721 - val_loss: 0.6351
Epoch 17/100
 - 2s - loss: