In [332]:
import pandas as pd
import numpy as np
import pyspark as ps
from pyspark.sql import SparkSession
from pyspark.sql.types import (
    IntegerType, StringType, IntegerType, FloatType, 
    StructField, StructType, DoubleType
)
from pyspark.ml.recommendation import ALS
from pyspark.ml.evaluation import RegressionEvaluator
import matplotlib.pyplot as plt
import pickle

## Import the pandas dataframe containing the info from the review scrape 

In [117]:
als_df = pd.read_pickle('als_df.pkl')

In [118]:
als_df.reset_index(drop=True, inplace=True)

In [119]:
als_df = als_df[['user_id', 'item_id', 'rating', 'date']]

In [120]:
als_df.shape

(5216, 4)

In [191]:
len(als_df['user_id'].unique())

1524

## Start a spark session to train an ALS model

In [121]:
spark = SparkSession.builder.getOrCreate()


In [122]:
spark_als_df = spark.createDataFrame(als_df) 


In [123]:
spark_als_df.printSchema()

root
 |-- user_id: long (nullable = true)
 |-- item_id: long (nullable = true)
 |-- rating: double (nullable = true)
 |-- date: timestamp (nullable = true)



In [124]:
spark_als_df.limit(5).show()

+-------+-------+------+-------------------+
|user_id|item_id|rating|               date|
+-------+-------+------+-------------------+
|   1520|    596|   3.0|2005-08-02 00:00:00|
|   1520|    592|   4.0|2005-09-14 00:00:00|
|   1369|    480|   4.0|2006-05-13 00:00:00|
|   1369|    601|   5.0|2006-05-19 00:00:00|
|   1369|    488|   5.0|2006-05-22 00:00:00|
+-------+-------+------+-------------------+



In [125]:
pandas_als_df = spark_als_df.toPandas()

In [126]:
train = pandas_als_df[:int(len(pandas_als_df)*.8)]
test = pandas_als_df[int(len(pandas_als_df)*.8):]

In [127]:
test = test[['user_id', 'item_id', 'rating']]
test.head()

Unnamed: 0,user_id,item_id,rating
4172,115,478,4.0
4173,346,296,5.0
4174,149,78,4.0
4175,533,308,5.0
4176,59,39,4.0


In [128]:
train = train[['user_id', 'item_id', 'rating']]

In [129]:
train.head()

Unnamed: 0,user_id,item_id,rating
0,1520,596,3.0
1,1520,592,4.0
2,1369,480,4.0
3,1369,601,5.0
4,1369,488,5.0


In [188]:
train.shape

(4172, 3)

In [148]:
als_model = ALS(
    itemCol='item_id',
    userCol='user_id',
    ratingCol='rating',
    nonnegative=True,    
    regParam=0.1,
    rank=10,
    coldStartStrategy = 'drop') 

In [149]:
spark_train = spark.createDataFrame(train) 

In [150]:
spark_test = spark.createDataFrame(test)

In [151]:
recommender = als_model.fit(spark_train)

In [152]:
train_preds = recommender.transform(spark_train)

In [153]:
test_preds = recommender.transform(spark_test)

In [154]:
evaluator = RegressionEvaluator(metricName="rmse", labelCol="rating",
                                predictionCol="prediction")

In [155]:
rmse_train = evaluator.evaluate(train_preds)

In [156]:
rmse_train

0.1541621332564429

In [157]:
rmse_test = evaluator.evaluate(test_preds)

In [158]:
rmse_test

1.4248735903235368

In [None]:
#restaurant_recs = recommender.recommendForAllUsers(10)

In [160]:
restaurant_recs.limit(5).toPandas()

Unnamed: 0,user_id,recommendations
0,471,"[(616, 5.617592811584473), (344, 5.07469081878..."
1,1342,"[(350, 3.8274924755096436), (162, 3.8184649944..."
2,463,"[(16, 5.386734962463379), (94, 5.3835062980651..."
3,833,"[(87, 5.153095245361328), (225, 5.072264671325..."
4,496,"[(16, 4.219986915588379), (64, 4.0130820274353..."


## Addressing the Cold Start Problem

In [140]:
user_factors = recommender.userFactors.collect()

In [141]:
item_factors = recommender.itemFactors.collect()

In [142]:
user_factors[0]

Row(id=0, features=[0.48242494463920593, 0.017530081793665886, 0.24016182124614716, 0.02696259692311287, 0.8009696006774902, 0.0, 1.1975611448287964, 0.7038739919662476, 0.4649558365345001, 0.9926409721374512])

In [143]:
item_factors[0]

Row(id=0, features=[0.10419613867998123, 0.38785940408706665, 1.1959340572357178, 0.04274549335241318, 0.05032627657055855, 1.2436206340789795, 1.2638238668441772, 0.05121312290430069, 0.012972562573850155, 1.6055973768234253])

In [164]:
uf_df = recommender.userFactors.toPandas()

In [165]:
if_df = recommender.itemFactors.toPandas()

In [388]:
if_df

Unnamed: 0,id,features
0,0,"[0.10419613867998123, 0.38785940408706665, 1.1..."
1,10,"[0.909296989440918, 0.7954235672950745, 1.3571..."
2,20,"[0.47380009293556213, 0.7909039258956909, 1.13..."
3,30,"[0.14003269374370575, 0.6215099096298218, 0.95..."
4,40,"[1.0893316268920898, 0.4428764879703522, 0.251..."
5,50,"[0.8308094143867493, 0.7112256288528442, 0.575..."
6,60,"[0.21283996105194092, 0.25575241446495056, 0.2..."
7,70,"[0.5204952955245972, 0.48651251196861267, 0.93..."
8,80,"[1.5235199928283691, 0.49236446619033813, 0.40..."
9,90,"[1.3531551361083984, 0.38193807005882263, 0.33..."


In [166]:
user_factors_array = np.array(uf_df['features'].tolist())

In [167]:
item_factors_array = np.array(if_df['features'].tolist())

In [168]:
user_factors_array.shape

(1333, 10)

In [169]:
item_factors_array.shape

(610, 10)

In [212]:
def predict_rating(user_idx, item_idx):
    """Return the predicted rating of item by user (by iloc)."""
    user_vector = user_factors_array[user_idx, :]
    item_vector = item_factors_array[item_idx, :].T
    return user_vector @ item_vector

In [194]:
def predict_rating_by_id(user_id, item_id):
    """Return the predicted rating of item by user (by id)."""
    user_idx = uf_df.index[uf_df['id'] == user_id][0]
    item_idx = if_df.index[if_df['id'] == item_id][0]
    return predict_rating(user_idx, item_idx)

In [273]:
predict_rating_by_id(471, 59)

3.0635660223987617

In [214]:
predict_rating(1, 1)

2.4345992274025559

In [268]:
user_idx = uf_df.index[uf_df['id'] == 471][0]

In [275]:
item_idx = if_df.index[if_df['id'] == 59][0]
item_idx

554

In [284]:
item_factors_array[item_idx]

array([ 1.23193836,  0.71234483,  0.61380726,  0.92233211,  0.51926273,
        0.45161936,  0.33576125,  0.53609425,  0.82403857,  0.73079431])

In [276]:
pred_ratings = user_factors_array[user_idx, :] @ item_factors_array[554, :].T

In [292]:
def get_restaurant_indexes(user_ratings, item_factors_df):
    rest_idxs = []
    for item in user_ratings['item_id']:
        rest_idx = if_df.index[if_df['id']==item]
        rest_idxs.append(rest_idx[0])
    return np.array(rest_idxs)
        

In [290]:
rest_idxs = []
for item in user_ratings['item_id']:
    rest_idx = if_df.index[if_df['id']==item]
    rest_idxs.append(rest_idx[0])

In [407]:
#restaurant_indexes = get_restaurant_indexes(user_ratings, if_df)

In [298]:
item_factors_array[rest_idxs]

array([[ 1.23193836,  0.71234483,  0.61380726,  0.92233211,  0.51926273,
         0.45161936,  0.33576125,  0.53609425,  0.82403857,  0.73079431],
       [ 0.        ,  0.28147367,  0.16980056,  0.88922822,  0.61474133,
         0.48812598,  0.1549332 ,  1.27417862,  0.        ,  0.15003248],
       [ 0.0820099 ,  0.04795893,  0.47511753,  1.29407072,  0.40441439,
         0.        ,  0.2522462 ,  0.48730141,  0.35950863,  1.63152337],
       [ 0.25579077,  0.15570927,  0.20673585,  0.65579718,  1.62764156,
         0.89648533,  0.10383582,  0.18013598,  0.07533462,  0.18924707]])

### X is our new user's factors vector; use to generate a predicted ratings row for new user

In [299]:
X, residuals, rank, s = np.linalg.lstsq(item_factors_array[rest_idxs], ratings.values)

In [304]:
newuser_factors = X

In [309]:
def newuser_predict(newuser_factors, item_factors_array):
    new_factor_list =[]
    for i in range(len(item_factors_array)):
        new_factor_list.append(np.dot(newuser_factors, item_factors_array[i]))
    newuser_preds = pd.DataFrame([new_factor_list], index=['newuser'])
    return newuser_preds

In [310]:
example_new_user = newuser_predict(newuser_factors, item_factors_array)

In [412]:
example_new_user

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,600,601,602,603,604,605,606,607,608,609
newuser,3.46077,2.088271,2.507537,3.980024,3.439144,1.95337,1.691812,3.499493,3.142832,2.373336,...,1.849569,0.948866,0.777959,2.737736,3.142338,1.002532,3.11784,2.591083,2.572401,2.79683


### Now... get the actual restaurant names here and sort the predictions to get the top preds

In [350]:
inv_alias_dict = {v: k for k, v in alias_dict.items()}
inv_alias_dict

{0: 'amazon-go-seattle-5',
 1: 'the-pink-door-seattle-4',
 2: 'nirmals-seattle',
 3: 'biscuit-bitch-seattle-5',
 4: 'damn-the-weather-seattle',
 5: 'bad-bishop-seattle',
 6: 'pike-place-chowder-seattle',
 7: 'tsukushinbo-seattle',
 8: 'taylor-shellfish-oyster-bar-seattle-2',
 9: 'elliotts-oyster-house-seattle-2',
 10: 'dough-zone-dumpling-house-seattle-2',
 11: 'the-london-plane-seattle-3',
 12: 'a-hong-kong-kitchen-seattle',
 13: 'radiator-whiskey-seattle',
 14: 'sizzling-pot-king-seattle-2',
 15: 'one-pot-seattle-3',
 16: 'ummas-lunch-box-seattle',
 17: 'dead-line-seattle',
 18: 'tats-delicatessen-seattle',
 19: 'harbor-city-restaurant-seattle',
 20: 'shawn-o-donnells-american-grill-and-irish-pub-seattle-2',
 21: 'commissary-korean-kitchen-seattle',
 22: 'king-noodle-seattle',
 23: 'bibimbap-king-seattle',
 24: 'good-bar-seattle-3',
 25: 'casco-antiguo-seattle',
 26: 'tofully-seattle-2',
 27: 'brgr-bar-seattle-seattle',
 28: 'japonessa-sushi-cocina-seattle',
 29: 'metropolitan-grill-

In [326]:
example_new_user_rest_names = example_new_user.rename(inv_alias_dict, axis=1)

In [330]:
example_new_user_rest_names.sort_values(by='newuser', axis=1, ascending=False)

Unnamed: 0,cherry-street-coffee-house-seattle-4,pho-saigon-seattle-2,la-creperie-voila-seattle-4,pho-ba-seattle-4,michou-seattle,altstadt-seattle-2,vons-1000-spirits-seattle-4,hong-kong-bistro-seattle,ivars-fish-bar-seattle-7,daily-dozen-doughnut-seattle,...,kings-barbeque-house-seattle-2,fortune-garden-seattle,tankard-and-tun-seattle-2,sound-soups-seattle,663-bistro-seattle,tap-house-grill-seattle-3,swannies-seattle,duk-li-dim-sum-seattle,qdoba-mexican-eats-seattle-7,i5-pho-seattle-seattle-2
newuser,5.962313,5.502468,5.367999,5.264682,5.0,4.741882,4.706773,4.669477,4.578557,4.567503,...,0.718246,0.707589,0.689746,0.589801,0.484221,0.406016,0.35505,0.247409,0.226909,0.197449


## Ok... so now try to use this with actual survey results

In [222]:
from survey_results import survey_results, usernames

In [339]:
gabe = survey_results[0]

In [340]:
gabe

{'café-campagne-seattle-3': 7,
 'chan-seattle-seattle-2': 8,
 'daawat-grill-seattle-2': 8,
 'el-borracho-seattle': 8,
 'gopoké-seattle-5': 6,
 'harbor-city-restaurant-seattle': 8,
 'japonessa-sushi-cocina-seattle': 9,
 'metropolitan-grill-seattle': 6,
 'mikes-noodle-house-seattle': 4,
 'mod-pizza-seattle-7': 8,
 'purple-café-and-wine-bar-seattle-3': 6,
 'red-robin-gourmet-burgers-seattle-7': 7,
 'specialtys-café-and-bakery-seattle-34': 7,
 'tamarind-tree-seattle': 7,
 'the-pink-door-seattle-4': 3,
 'vons-1000-spirits-seattle-4': 4,
 'wild-ginger-seattle': 8,
 'zig-zag-cafe-seattle-2': 4}

In [345]:
#divide ratings by two to put on the same scale
#convert aliases -> ids -> indexes for those ids
#find the item feature vectors for each of those restaurants by indexing into the item_factors_array
#make a "user ratings df" with the user, restaurant ids, and ratings

In [353]:
#divide ratings by two to put on the same scale
gabe_standardized = {k: v / 2 for k, v in gabe.items()}
gabe_standardized

{'café-campagne-seattle-3': 3.5,
 'chan-seattle-seattle-2': 4.0,
 'daawat-grill-seattle-2': 4.0,
 'el-borracho-seattle': 4.0,
 'gopoké-seattle-5': 3.0,
 'harbor-city-restaurant-seattle': 4.0,
 'japonessa-sushi-cocina-seattle': 4.5,
 'metropolitan-grill-seattle': 3.0,
 'mikes-noodle-house-seattle': 2.0,
 'mod-pizza-seattle-7': 4.0,
 'purple-café-and-wine-bar-seattle-3': 3.0,
 'red-robin-gourmet-burgers-seattle-7': 3.5,
 'specialtys-café-and-bakery-seattle-34': 3.5,
 'tamarind-tree-seattle': 3.5,
 'the-pink-door-seattle-4': 1.5,
 'vons-1000-spirits-seattle-4': 2.0,
 'wild-ginger-seattle': 4.0,
 'zig-zag-cafe-seattle-2': 2.0}

In [440]:
#inv_alias_dict

In [441]:
#convert aliases -> ids -> indexes for those ids
id_to_rating = {k: gabe_standardized[v] for k, v in inv_alias_dict.items() if v in gabe_standardized}

In [390]:
user_ratings_df = pd.DataFrame.from_dict(id_to_rating, orient='index')

In [398]:
user_ratings_df.reset_index(inplace=True)

In [508]:
user_ratings_df.rename(columns={'index':'item_id', 0:'rating'}, inplace=True)


In [418]:
survey_ratings = user_ratings_df['rating'].values.reshape(-1, 1)

In [422]:
survey_ratings_notreshaped = user_ratings_df['rating'].values

In [408]:
rest_idx = get_restaurant_indexes(user_ratings_df, if_df)

In [411]:
latent_item_features = item_factors_array[rest_idx]

In [423]:
X, residuals, rank, s = np.linalg.lstsq(latent_item_features, survey_ratings_notreshaped)

In [424]:
X

array([ 0.57038828, -0.03746423,  0.91142951,  1.27560665, -0.6011417 ,
        1.71059839, -0.21787959,  0.70794957,  0.07537956,  0.54031314])

In [429]:
gabes_preds = newuser_predict(X, item_factors_array)

In [431]:
gabes_preds

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,600,601,602,603,604,605,606,607,608,609
newuser,3.915918,4.202314,2.205367,1.393903,2.968143,4.604808,1.489047,4.889152,4.967544,2.763279,...,3.466685,2.12971,4.106549,3.668647,2.643615,0.663643,3.093345,3.942243,2.26567,3.263689


In [437]:
gabe_sorted_preds = gabes_preds.sort_values(by='newuser', axis=1, ascending=False).rename(inv_alias_dict, axis=1)

In [439]:
list(gabe_sorted_preds.columns)

['goldfinch-tavern-seattle',
 'cherry-street-coffee-house-seattle-4',
 'mi-la-cay-seattle',
 'retro-restaurant-and-lounge-seattle',
 'liana-cafe-house-seattle',
 'red-robin-gourmet-burgers-seattle-7',
 'the-taproom-at-pike-place-seattle',
 'buffalo-wild-wings-seattle-2',
 'blue-c-sushi-seattle-3',
 'taylor-shellfish-oyster-bar-seattle-2',
 'steelhead-diner-seattle',
 'la-vaca-burrito-express-seattle',
 'revolution-pizza-and-music-seattle',
 'great-state-burger-seattle-6',
 'oriental-mart-seattle-5',
 'tsukushinbo-seattle',
 'tandoozy-seattle',
 'taco-del-mar-seattle-15',
 'miners-landing-seattle',
 'the-little-london-plane-seattle-2',
 '4th-ave-espresso-bar-seattle-3',
 'frolik-kitchen-cocktails-seattle-2',
 'noodle-zen-seattle',
 'the-central-saloon-seattle',
 'the-bistro-at-courtyard-seattle',
 'unconventional-pizza-seattle',
 'jimmy-johns-seattle-8',
 'tats-truck-seattle',
 'blue-water-taco-grill-seattle-12',
 'stella-fiore-wood-fired-pizza-seattle-3',
 'king-street-kafe-seattle',
 

## Make some functions that do all this, import it, try it out

In [467]:
from ALS import ALSRecommender

In [468]:
nicole = survey_results[1]

In [469]:
a_recommender = ALSRecommender(uf_df, if_df, inv_alias_dict)

In [473]:
nicole_preds = a_recommender.get_preds_from_survey_results(nicole)

In [474]:
nicole_preds

Unnamed: 0,biscuit-bitch-seattle-5,pikes-pit-bar-b-que-seattle,apizza-seattle-3,cherry-street-coffee-house-seattle-10,amazon-go-seattle-5,james-moore-bar-and-kitchen-seattle,dong-thap-noodles-seattle,umai-sushi-and-teriyaki-seattle,ettas-seattle-2,il-corvo-pasta-seattle,...,luigis-italian-eatery-seattle-2,saffron-spice-seattle,frolik-kitchen-cocktails-seattle-2,oriental-mart-seattle-5,steelhead-diner-seattle,taco-del-mar-seattle-28,i-heart-sushi-seattle,harbor-city-restaurant-seattle,grand-central-bakery-seattle-5,lowells-restaurant-seattle
newuser,32.938301,27.687891,26.281604,25.670297,24.354026,24.007295,23.248938,21.198956,20.854826,20.451822,...,-18.927373,-19.163136,-20.150024,-20.265583,-20.748611,-20.748702,-21.949447,-22.683841,-34.95333,-40.567627


In [459]:
jonny = survey_results[3]

In [477]:
jonny_preds = a_recommender.get_preds_from_survey_results(jonny)
jonny_preds

Unnamed: 0,biscuit-bitch-seattle-5,pikes-pit-bar-b-que-seattle,amazon-go-seattle-5,apizza-seattle-3,cherry-street-coffee-house-seattle-10,james-moore-bar-and-kitchen-seattle,ettas-seattle-2,umai-sushi-and-teriyaki-seattle,zobel-seattle,dong-thap-noodles-seattle,...,ten-sushi-35-seattle-2,dunbar-room-seattle,saffron-spice-seattle,luigis-italian-eatery-seattle-2,pizza-and-pasta-bar-seattle-2,taco-del-mar-seattle-28,harbor-city-restaurant-seattle,i-heart-sushi-seattle,grand-central-bakery-seattle-5,lowells-restaurant-seattle
newuser,16.47292,16.136041,15.737525,15.671637,13.905126,13.497469,12.913731,12.68666,12.259833,12.140751,...,-7.04447,-7.780404,-7.874688,-7.916165,-8.636512,-8.677275,-8.728556,-9.076988,-14.971746,-17.726767


In [476]:
jonny_preds

Unnamed: 0,biscuit-bitch-seattle-5,pikes-pit-bar-b-que-seattle,amazon-go-seattle-5,apizza-seattle-3,cherry-street-coffee-house-seattle-10,james-moore-bar-and-kitchen-seattle,ettas-seattle-2,umai-sushi-and-teriyaki-seattle,zobel-seattle,dong-thap-noodles-seattle,...,ten-sushi-35-seattle-2,dunbar-room-seattle,saffron-spice-seattle,luigis-italian-eatery-seattle-2,pizza-and-pasta-bar-seattle-2,taco-del-mar-seattle-28,harbor-city-restaurant-seattle,i-heart-sushi-seattle,grand-central-bakery-seattle-5,lowells-restaurant-seattle
newuser,16.47292,16.136041,15.737525,15.671637,13.905126,13.497469,12.913731,12.68666,12.259833,12.140751,...,-7.04447,-7.780404,-7.874688,-7.916165,-8.636512,-8.677275,-8.728556,-9.076988,-14.971746,-17.726767


In [462]:
alex = survey_results[2]

In [463]:
alex_preds = a_recommender.get_preds_from_survey_results(alex, inv_alias_dict)
alex_preds

Unnamed: 0,lowells-restaurant-seattle,grand-central-bakery-seattle-5,harbor-city-restaurant-seattle,steelhead-diner-seattle,i-heart-sushi-seattle,taco-del-mar-seattle-28,oriental-mart-seattle-5,frolik-kitchen-cocktails-seattle-2,herbs-thai-to-go-seattle,the-taproom-at-pike-place-seattle,...,aloha-plates-seattle,rub-with-love-shack-seattle,amazon-go-seattle-5,apizza-seattle-3,james-moore-bar-and-kitchen-seattle,cherry-street-coffee-house-seattle-10,pikes-pit-bar-b-que-seattle,dong-thap-noodles-seattle,incredibowls-seattle-2,biscuit-bitch-seattle-5
newuser,67.018514,59.691544,43.130078,42.942915,41.517643,40.120691,39.55299,39.148831,38.275578,37.847721,...,-21.890633,-22.795156,-24.43904,-26.018849,-26.896368,-27.929653,-30.499478,-30.751683,-30.800841,-40.651205


In [496]:
all_users = pd.DataFrame(index=range(len(survey_results)), columns=inv_alias_dict.values())
all_users

Unnamed: 0,amazon-go-seattle-5,the-pink-door-seattle-4,nirmals-seattle,biscuit-bitch-seattle-5,damn-the-weather-seattle,bad-bishop-seattle,pike-place-chowder-seattle,tsukushinbo-seattle,taylor-shellfish-oyster-bar-seattle-2,elliotts-oyster-house-seattle-2,...,mediterranean-cuisine-seattle,ricenroll-seattle-2,union-deli-la-puget-sound-plaza-building-seattle,measure-seattle,subway-seattle-3,u-bank-cafe-seattle,subway-seattle-31,subway-seattle-66,organic-to-go-seattle-4,chez-dave-seattle-2
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,


In [505]:
a = ALSRecommender(uf_df, if_df, inv_alias_dict)
#all_users = pd.DataFrame(index=range(len(survey_results)), columns=inv_alias_dict.values())

for survey in survey_results:
    results = a.get_preds_from_survey_results(survey)
results

Unnamed: 0,lowells-restaurant-seattle,grand-central-bakery-seattle-5,harbor-city-restaurant-seattle,steelhead-diner-seattle,taco-del-mar-seattle-28,i-heart-sushi-seattle,herbs-thai-to-go-seattle,market-cafe-seattle-3,pizza-and-pasta-bar-seattle-2,oriental-mart-seattle-5,...,little-italy-seattle-seattle,rub-with-love-shack-seattle,ettas-seattle-2,james-moore-bar-and-kitchen-seattle,cherry-street-coffee-house-seattle-10,dong-thap-noodles-seattle,apizza-seattle-3,amazon-go-seattle-5,pikes-pit-bar-b-que-seattle,biscuit-bitch-seattle-5
newuser,163.471328,154.734591,97.601715,95.474006,92.668808,90.394956,89.606701,87.416394,85.800498,83.087646,...,-65.404657,-67.408084,-69.774038,-72.914338,-79.815284,-84.853743,-88.394769,-89.658892,-96.692292,-108.992908


#### bring in the alias_dict to translate rest aliases to ids

In [226]:
more_than_one_review = pd.read_pickle('more_than_one_review_df.pkl')

In [227]:
aliases = more_than_one_review['alias'].unique()
alias_ids = zip(aliases, range(len(aliases)))
alias_dict = dict(alias_ids)

In [333]:
with open("alias_dict.txt", "wb") as myFile:
    pickle.dump(alias_dict, myFile)

In [257]:
gabe_df = pd.DataFrame(columns=alias_dict.keys(), index=usernames)

In [258]:
gabe_df

Unnamed: 0,amazon-go-seattle-5,the-pink-door-seattle-4,nirmals-seattle,biscuit-bitch-seattle-5,damn-the-weather-seattle,bad-bishop-seattle,pike-place-chowder-seattle,tsukushinbo-seattle,taylor-shellfish-oyster-bar-seattle-2,elliotts-oyster-house-seattle-2,...,mediterranean-cuisine-seattle,ricenroll-seattle-2,union-deli-la-puget-sound-plaza-building-seattle,measure-seattle,subway-seattle-3,u-bank-cafe-seattle,subway-seattle-31,subway-seattle-66,organic-to-go-seattle-4,chez-dave-seattle-2
gabe,,,,,,,,,,,...,,,,,,,,,,
nicole,,,,,,,,,,,...,,,,,,,,,,
jordan,,,,,,,,,,,...,,,,,,,,,,
jonny,,,,,,,,,,,...,,,,,,,,,,
alex,,,,,,,,,,,...,,,,,,,,,,
grant,,,,,,,,,,,...,,,,,,,,,,


In [259]:
def create_user_df(survey, df):
    for key, value in survey.items():
        df.iloc[0][key] = survey[key]
    return df

In [261]:
gabe = create_user_df(gabe, gabe_df).loc['gabe']