In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

To use Git LFS, you must install the Git LFS client.

See https://git-lfs.github.com for more information.

Run: `git lfs install`

In [28]:
philly_users = pd.read_feather('user_philly.feather')
philly_reviews = pd.read_feather('review_philly.feather')
philly_bus = pd.read_feather('business_philly.feather')

In [4]:
bucketed_philly_share_of_reviews = []

for i in philly_users['philly_share_of_reviews']:
    if .75 < i:
        bucketed_philly_share_of_reviews.append('more_than_75_percent')
    elif .5 < i <= .75:
        bucketed_philly_share_of_reviews.append('more_than_50_up_to_75_percent')
    elif .25 < i <= .5:
        bucketed_philly_share_of_reviews.append('more_than_25_up_to_50_percent')
    elif i <= .25:
        bucketed_philly_share_of_reviews.append('less_than_or_equal_to_25_percent')

In [5]:
np.unique(np.array(bucketed_philly_share_of_reviews), return_counts=True)

(array(['less_than_or_equal_to_25_percent',
        'more_than_25_up_to_50_percent', 'more_than_50_up_to_75_percent',
        'more_than_75_percent'], dtype='<U32'),
 array([124651,  56858,  17534,  46368], dtype=int64))

In [6]:
philly_users['bucketed_philly_share_of_reviews'] = np.array(bucketed_philly_share_of_reviews)
philly_users

Unnamed: 0,index,_id,user_id,name,review_count,yelping_since,useful,funny,cool,elite,...,compliment_note,compliment_plain,compliment_cool,compliment_funny,compliment_writer,compliment_photos,gender_score,philly_reviews,philly_share_of_reviews,bucketed_philly_share_of_reviews
0,4,631ea1f7abab926ea88770d7,j14WgRoU_-2ZE1aw1dXrJg,Daniel,4333,2009-01-25 04:35:42,43091,13066,27281,"2009,2010,2011,2012,2013,2014,2015,2016,2017,2...",...,1847,7054,3131,3131,1521,1946,0.995754,13,0.003000,less_than_or_equal_to_25_percent
1,6,631ea1f7abab926ea88770d9,AUi8MPWJ0mLkMfwbui27lg,John,109,2010-01-07 18:32:04,154,20,23,,...,1,6,3,3,0,0,0.995785,2,0.018349,less_than_or_equal_to_25_percent
2,12,631ea1f7abab926ea88770df,1McG5Rn_UDkmlkZOrsdptg,Teresa,7,2009-05-26 16:11:11,18,3,13,,...,1,0,2,2,0,0,0.002806,5,0.714286,more_than_50_up_to_75_percent
3,21,631ea1f7abab926ea88770e8,q_QQ5kBBwlCcbL1s4NVK3g,Jane,1221,2005-03-14 20:26:35,14953,9940,11211,200620072008200920102011201220132014,...,1212,5696,2543,2543,815,323,0.003043,4,0.003276,less_than_or_equal_to_25_percent
4,22,631ea1f7abab926ea88770e9,qVc8ODYU5SZjKXVBgXdI7w,Walker,585,2007-01-25 16:47:26,7217,1259,5994,2007,...,232,844,467,467,239,180,0.986824,3,0.005128,less_than_or_equal_to_25_percent
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245406,1987805,631ea2f2abab926ea8a5c5b0,Tr2yeddopDzMhSb7jbdNeA,Heather,1,2017-07-25 19:31:30,0,0,0,,...,0,0,0,0,0,0,0.002816,1,1.000000,more_than_75_percent
245407,1987831,631ea2f2abab926ea8a5c5ca,XTWARBzLbiJKQ1JeJB9f3g,Sakina,6,2017-11-24 03:17:11,1,0,0,,...,0,0,0,0,0,0,0.000000,1,0.166667,less_than_or_equal_to_25_percent
245408,1987832,631ea2f2abab926ea8a5c5cb,iZ0puydkFQlaSQAXbGtv1g,Dr. Sylvia,1,2014-07-08 19:54:19,2,0,0,,...,0,0,0,0,0,0,0.500000,1,1.000000,more_than_75_percent
245409,1987859,631ea2f2abab926ea8a5c5e6,K-BNvWUCWEGUuBirCU4mmg,Jodi,2,2015-03-23 17:29:22,0,0,0,,...,0,0,0,0,0,0,0.011541,1,0.500000,more_than_25_up_to_50_percent


In [7]:
bucketed_gender_scores = []

for i in philly_users['gender_score']:
    if .8 < i:
        bucketed_gender_scores.append('more_than_80_percent')
    elif .6 < i <= .8:
        bucketed_gender_scores.append('more_than_60_up_to_80_percent')
    elif .4 < i <= .6:
        bucketed_gender_scores.append('more_than_40_up_to_60_percent')
    elif .2 < i <= .4:
        bucketed_gender_scores.append('more_than_20_up_to_40_percent')
    elif i <= .2:
        bucketed_gender_scores.append('less_than_or_equal_to_20_percent')

In [8]:
np.unique(np.array(bucketed_gender_scores), return_counts=True)

(array(['less_than_or_equal_to_20_percent',
        'more_than_20_up_to_40_percent', 'more_than_40_up_to_60_percent',
        'more_than_60_up_to_80_percent', 'more_than_80_percent'],
       dtype='<U32'),
 array([118185,   4682,  29206,   2906,  90432], dtype=int64))

In [9]:
philly_users['bucketed_gender_scores'] = np.array(bucketed_gender_scores)
philly_users

Unnamed: 0,index,_id,user_id,name,review_count,yelping_since,useful,funny,cool,elite,...,compliment_plain,compliment_cool,compliment_funny,compliment_writer,compliment_photos,gender_score,philly_reviews,philly_share_of_reviews,bucketed_philly_share_of_reviews,bucketed_gender_scores
0,4,631ea1f7abab926ea88770d7,j14WgRoU_-2ZE1aw1dXrJg,Daniel,4333,2009-01-25 04:35:42,43091,13066,27281,"2009,2010,2011,2012,2013,2014,2015,2016,2017,2...",...,7054,3131,3131,1521,1946,0.995754,13,0.003000,less_than_or_equal_to_25_percent,more_than_80_percent
1,6,631ea1f7abab926ea88770d9,AUi8MPWJ0mLkMfwbui27lg,John,109,2010-01-07 18:32:04,154,20,23,,...,6,3,3,0,0,0.995785,2,0.018349,less_than_or_equal_to_25_percent,more_than_80_percent
2,12,631ea1f7abab926ea88770df,1McG5Rn_UDkmlkZOrsdptg,Teresa,7,2009-05-26 16:11:11,18,3,13,,...,0,2,2,0,0,0.002806,5,0.714286,more_than_50_up_to_75_percent,less_than_or_equal_to_20_percent
3,21,631ea1f7abab926ea88770e8,q_QQ5kBBwlCcbL1s4NVK3g,Jane,1221,2005-03-14 20:26:35,14953,9940,11211,200620072008200920102011201220132014,...,5696,2543,2543,815,323,0.003043,4,0.003276,less_than_or_equal_to_25_percent,less_than_or_equal_to_20_percent
4,22,631ea1f7abab926ea88770e9,qVc8ODYU5SZjKXVBgXdI7w,Walker,585,2007-01-25 16:47:26,7217,1259,5994,2007,...,844,467,467,239,180,0.986824,3,0.005128,less_than_or_equal_to_25_percent,more_than_80_percent
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245406,1987805,631ea2f2abab926ea8a5c5b0,Tr2yeddopDzMhSb7jbdNeA,Heather,1,2017-07-25 19:31:30,0,0,0,,...,0,0,0,0,0,0.002816,1,1.000000,more_than_75_percent,less_than_or_equal_to_20_percent
245407,1987831,631ea2f2abab926ea8a5c5ca,XTWARBzLbiJKQ1JeJB9f3g,Sakina,6,2017-11-24 03:17:11,1,0,0,,...,0,0,0,0,0,0.000000,1,0.166667,less_than_or_equal_to_25_percent,less_than_or_equal_to_20_percent
245408,1987832,631ea2f2abab926ea8a5c5cb,iZ0puydkFQlaSQAXbGtv1g,Dr. Sylvia,1,2014-07-08 19:54:19,2,0,0,,...,0,0,0,0,0,0.500000,1,1.000000,more_than_75_percent,more_than_40_up_to_60_percent
245409,1987859,631ea2f2abab926ea8a5c5e6,K-BNvWUCWEGUuBirCU4mmg,Jodi,2,2015-03-23 17:29:22,0,0,0,,...,0,0,0,0,0,0.011541,1,0.500000,more_than_25_up_to_50_percent,less_than_or_equal_to_20_percent


In [19]:

df = philly_reviews.groupby('user_id')['stars'].mean()
philly_users = pd.merge(philly_users, df, on=['user_id'], how='left')
philly_users

Unnamed: 0,index,_id,user_id,name,review_count,yelping_since,useful,funny,cool,elite,...,compliment_cool,compliment_funny,compliment_writer,compliment_photos,gender_score,philly_reviews,philly_share_of_reviews,bucketed_philly_share_of_reviews,bucketed_gender_scores,stars
0,4,631ea1f7abab926ea88770d7,j14WgRoU_-2ZE1aw1dXrJg,Daniel,4333,2009-01-25 04:35:42,43091,13066,27281,"2009,2010,2011,2012,2013,2014,2015,2016,2017,2...",...,3131,3131,1521,1946,0.995754,13,0.003000,less_than_or_equal_to_25_percent,more_than_80_percent,4.0
1,6,631ea1f7abab926ea88770d9,AUi8MPWJ0mLkMfwbui27lg,John,109,2010-01-07 18:32:04,154,20,23,,...,3,3,0,0,0.995785,2,0.018349,less_than_or_equal_to_25_percent,more_than_80_percent,2.0
2,12,631ea1f7abab926ea88770df,1McG5Rn_UDkmlkZOrsdptg,Teresa,7,2009-05-26 16:11:11,18,3,13,,...,2,2,0,0,0.002806,5,0.714286,more_than_50_up_to_75_percent,less_than_or_equal_to_20_percent,4.0
3,21,631ea1f7abab926ea88770e8,q_QQ5kBBwlCcbL1s4NVK3g,Jane,1221,2005-03-14 20:26:35,14953,9940,11211,200620072008200920102011201220132014,...,2543,2543,815,323,0.003043,4,0.003276,less_than_or_equal_to_25_percent,less_than_or_equal_to_20_percent,4.0
4,22,631ea1f7abab926ea88770e9,qVc8ODYU5SZjKXVBgXdI7w,Walker,585,2007-01-25 16:47:26,7217,1259,5994,2007,...,467,467,239,180,0.986824,3,0.005128,less_than_or_equal_to_25_percent,more_than_80_percent,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245406,1987805,631ea2f2abab926ea8a5c5b0,Tr2yeddopDzMhSb7jbdNeA,Heather,1,2017-07-25 19:31:30,0,0,0,,...,0,0,0,0,0.002816,1,1.000000,more_than_75_percent,less_than_or_equal_to_20_percent,5.0
245407,1987831,631ea2f2abab926ea8a5c5ca,XTWARBzLbiJKQ1JeJB9f3g,Sakina,6,2017-11-24 03:17:11,1,0,0,,...,0,0,0,0,0.000000,1,0.166667,less_than_or_equal_to_25_percent,less_than_or_equal_to_20_percent,4.0
245408,1987832,631ea2f2abab926ea8a5c5cb,iZ0puydkFQlaSQAXbGtv1g,Dr. Sylvia,1,2014-07-08 19:54:19,2,0,0,,...,0,0,0,0,0.500000,1,1.000000,more_than_75_percent,more_than_40_up_to_60_percent,5.0
245409,1987859,631ea2f2abab926ea8a5c5e6,K-BNvWUCWEGUuBirCU4mmg,Jodi,2,2015-03-23 17:29:22,0,0,0,,...,0,0,0,0,0.011541,1,0.500000,more_than_25_up_to_50_percent,less_than_or_equal_to_20_percent,5.0


In [20]:
np.unique(philly_users.stars, return_counts=True)

(array([1.        , 1.08333333, 1.11111111, ..., 4.97435897, 5.        ,
               nan]),
 array([32202,     1,     1, ...,     1, 92732,    48], dtype=int64))

In [21]:
philly_users[philly_users.stars.isna()].head()

Unnamed: 0,index,_id,user_id,name,review_count,yelping_since,useful,funny,cool,elite,...,compliment_cool,compliment_funny,compliment_writer,compliment_photos,gender_score,philly_reviews,philly_share_of_reviews,bucketed_philly_share_of_reviews,bucketed_gender_scores,stars
203,553,631ea1f7abab926ea88772fc,2OLohmwFrl8ru4w1mUXtyw,Connie,371,2006-08-31 19:22:24,952,195,398,200620072008.0,...,20,20,13,9,0.025446,1,0.002695,less_than_or_equal_to_25_percent,less_than_or_equal_to_20_percent,
575,1656,631ea1f7abab926ea887774b,iH7pQFG0QusnW03YZc0CBg,Erin,26,2011-03-03 20:08:31,15,1,4,,...,0,0,1,0,0.028711,1,0.038462,less_than_or_equal_to_25_percent,less_than_or_equal_to_20_percent,
692,2024,631ea1f7abab926ea88778bb,bfDzZWI2cldhOYQlgFKoJw,Kelly,312,2008-01-12 05:36:34,657,265,313,2008200920102011.0,...,51,51,13,5,0.147612,1,0.003205,less_than_or_equal_to_25_percent,less_than_or_equal_to_20_percent,
1406,4124,631ea1f7abab926ea88780ef,TpSmU6Hd-DFPVNq6XxnXVw,Jonathan,60,2012-06-29 16:33:52,101,41,51,,...,1,1,0,0,0.995804,1,0.016667,less_than_or_equal_to_25_percent,more_than_80_percent,
1852,5523,631ea1f7abab926ea8878666,4NBjVeHXBbuDhdHAeD-oEA,Dwi,19,2008-01-12 01:12:19,18,2,9,,...,2,2,0,0,0.5,1,0.052632,less_than_or_equal_to_25_percent,more_than_40_up_to_60_percent,


In [22]:
philly_users.rename(columns = {'stars': 'average_stars_given'}, inplace=True)

In [23]:
bucketed_average_stars_given = []

for i in philly_users['average_stars_given']:
    if 4 < i:
        bucketed_average_stars_given.append('more_than_4')
    elif 3 < i <= 4:
        bucketed_average_stars_given.append('more_than_3_up_to_4')
    elif 2 < i <= 3:
        bucketed_average_stars_given.append('more_than_2_up_to_3')
    elif 1 < i <= 2:
        bucketed_average_stars_given.append('more_than_1_up_to_2')
    elif i <= 1:
        bucketed_average_stars_given.append('less_than_1')
    else:
        bucketed_average_stars_given.append(i)

In [24]:
np.unique(np.array(bucketed_average_stars_given), return_counts=True)

(array(['less_than_1', 'more_than_1_up_to_2', 'more_than_2_up_to_3',
        'more_than_3_up_to_4', 'more_than_4', 'nan'], dtype='<U32'),
 array([ 32202,  15359,  24083,  55051, 118668,     48], dtype=int64))

In [25]:
philly_users['bucketed_average_stars_given'] = np.array(bucketed_average_stars_given)
philly_users

Unnamed: 0,index,_id,user_id,name,review_count,yelping_since,useful,funny,cool,elite,...,compliment_funny,compliment_writer,compliment_photos,gender_score,philly_reviews,philly_share_of_reviews,bucketed_philly_share_of_reviews,bucketed_gender_scores,average_stars_given,bucketed_average_stars_given
0,4,631ea1f7abab926ea88770d7,j14WgRoU_-2ZE1aw1dXrJg,Daniel,4333,2009-01-25 04:35:42,43091,13066,27281,"2009,2010,2011,2012,2013,2014,2015,2016,2017,2...",...,3131,1521,1946,0.995754,13,0.003000,less_than_or_equal_to_25_percent,more_than_80_percent,4.0,more_than_3_up_to_4
1,6,631ea1f7abab926ea88770d9,AUi8MPWJ0mLkMfwbui27lg,John,109,2010-01-07 18:32:04,154,20,23,,...,3,0,0,0.995785,2,0.018349,less_than_or_equal_to_25_percent,more_than_80_percent,2.0,more_than_1_up_to_2
2,12,631ea1f7abab926ea88770df,1McG5Rn_UDkmlkZOrsdptg,Teresa,7,2009-05-26 16:11:11,18,3,13,,...,2,0,0,0.002806,5,0.714286,more_than_50_up_to_75_percent,less_than_or_equal_to_20_percent,4.0,more_than_3_up_to_4
3,21,631ea1f7abab926ea88770e8,q_QQ5kBBwlCcbL1s4NVK3g,Jane,1221,2005-03-14 20:26:35,14953,9940,11211,200620072008200920102011201220132014,...,2543,815,323,0.003043,4,0.003276,less_than_or_equal_to_25_percent,less_than_or_equal_to_20_percent,4.0,more_than_3_up_to_4
4,22,631ea1f7abab926ea88770e9,qVc8ODYU5SZjKXVBgXdI7w,Walker,585,2007-01-25 16:47:26,7217,1259,5994,2007,...,467,239,180,0.986824,3,0.005128,less_than_or_equal_to_25_percent,more_than_80_percent,5.0,more_than_4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245406,1987805,631ea2f2abab926ea8a5c5b0,Tr2yeddopDzMhSb7jbdNeA,Heather,1,2017-07-25 19:31:30,0,0,0,,...,0,0,0,0.002816,1,1.000000,more_than_75_percent,less_than_or_equal_to_20_percent,5.0,more_than_4
245407,1987831,631ea2f2abab926ea8a5c5ca,XTWARBzLbiJKQ1JeJB9f3g,Sakina,6,2017-11-24 03:17:11,1,0,0,,...,0,0,0,0.000000,1,0.166667,less_than_or_equal_to_25_percent,less_than_or_equal_to_20_percent,4.0,more_than_3_up_to_4
245408,1987832,631ea2f2abab926ea8a5c5cb,iZ0puydkFQlaSQAXbGtv1g,Dr. Sylvia,1,2014-07-08 19:54:19,2,0,0,,...,0,0,0,0.500000,1,1.000000,more_than_75_percent,more_than_40_up_to_60_percent,5.0,more_than_4
245409,1987859,631ea2f2abab926ea8a5c5e6,K-BNvWUCWEGUuBirCU4mmg,Jodi,2,2015-03-23 17:29:22,0,0,0,,...,0,0,0,0.011541,1,0.500000,more_than_25_up_to_50_percent,less_than_or_equal_to_20_percent,5.0,more_than_4


In [26]:
# feather format to compress
philly_users.to_feather('user_philly.feather', compression='zstd')

In [29]:
philly_bus

Unnamed: 0,index,_id,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours,positive_%
0,31,631ea3b2a5cde8cc0d6eec47,-0M0b-XhtFagyLmsBtOe8w,Paris Wine Bar,2303 Fairmount Ave,Philadelphia,PA,19130,39.967439,-75.175452,3.5,18,0,"{'Alcohol': ""u'full_bar'"", 'OutdoorSeating': '...","Bars, Nightlife, Restaurants, French, Wine Bars","{'Thursday': '17:0-0:0', 'Friday': '17:0-0:0',...",0.722222
1,32,631ea3b0a5cde8cc0d6dfa60,-0PN_KFPtbnLQZEeb23XiA,Mr Wong's Chinese Restaurant,1849 Wolf St,Philadelphia,PA,19145,39.923048,-75.178078,3.5,9,0,"{'OutdoorSeating': 'False', 'BusinessAcceptsCr...","Restaurants, Chinese","{'Monday': '11:0-23:0', 'Tuesday': '11:0-23:0'...",0.636364
2,33,631ea3aea5cde8cc0d6d5a50,-0TffRSXXIlBYVbb5AwfTg,IndeBlue Modern Indian Food & Spirits,205 South 13th St,Philadelphia,PA,19107,39.948508,-75.161969,4.5,1097,1,"{'RestaurantsReservations': 'True', 'NoiseLeve...","Cocktail Bars, Food Delivery Services, Nightli...","{'Monday': '0:0-0:0', 'Tuesday': '16:0-22:0', ...",0.874780
3,37,631ea3b0a5cde8cc0d6e2ef1,-0eUa8TsXFFy0FCxHYmrjg,Waterfront Gourmet Cafe & Deli,3131 Walnut St,Philadelphia,PA,19104,39.952446,-75.187321,4.0,26,0,"{'BikeParking': 'True', 'RestaurantsGoodForGro...","Caterers, Sandwiches, Delis, Restaurants, Cafe...","{'Monday': '7:0-20:0', 'Tuesday': '7:0-20:0', ...",0.821429
4,40,631ea3ada5cde8cc0d6d26c0,-0fvhILrC9UsQ6gLNpZlTQ,David's Southern Fried Pies,8601 Frankford Ave,Philadelphia,PA,19136,40.046191,-75.015090,4.5,18,0,"{'BusinessAcceptsBitcoin': 'False', 'Caters': ...","Desserts, Food","{'Monday': '0:0-0:0', 'Tuesday': '12:0-19:0', ...",0.894737
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10386,100586,631ea3b3a5cde8cc0d6f417f,zyPABnfcXJagJGwReCsT1A,Jay's Pedal Power Bicycle Shop,512 E Girard Ave,Philadelphia,PA,19125,39.970766,-75.127960,3.0,9,0,"{'BikeParking': 'True', 'BusinessParking': ""{'...","Sporting Goods, Active Life, Shopping, Bike Re...",,0.444444
10387,100591,631ea3ada5cde8cc0d6d0f9e,zyge4T5eSiPHq1-IaJb_Qg,Nancy Le Nails,7419 Stenton Ave,Philadelphia,PA,19150,40.060970,-75.167412,2.0,10,1,"{'BusinessAcceptsCreditCards': 'False', 'ByApp...","Nail Salons, Beauty & Spas",,0.400000
10388,100599,631ea3afa5cde8cc0d6dde3d,zz-fcqurtm77bZ_rVvo2Lw,Yumtown,N 13th and Norris,Philadelphia,PA,19122,39.982937,-75.154732,4.5,24,0,"{'RestaurantsPriceRange2': '1', 'Alcohol': ""u'...","Food Stands, Restaurants","{'Monday': '11:0-16:0', 'Tuesday': '11:0-16:0'...",0.840000
10389,100603,631ea3b2a5cde8cc0d6ef422,zz3E7kmJI2r2JseE6LAnrw,Hung Vuong Super Market,1122 Washington Ave,Philadelphia,PA,19147,39.936582,-75.162655,3.5,99,1,"{'RestaurantsTakeOut': 'True', 'BusinessParkin...","Food, Grocery, International Grocery","{'Monday': '8:0-21:30', 'Tuesday': '8:0-21:30'...",0.660194


In [30]:
bucketed_average_stars_received = []

for i in philly_bus['stars']:
    if 4 < i:
        bucketed_average_stars_received.append('more_than_4')
    elif 3 < i <= 4:
        bucketed_average_stars_received.append('more_than_3_up_to_4')
    elif 2 < i <= 3:
        bucketed_average_stars_received.append('more_than_2_up_to_3')
    elif 1 < i <= 2:
        bucketed_average_stars_received.append('more_than_1_up_to_2')
    elif i <= 1:
        bucketed_average_stars_received.append('less_than_1')
    else:
        bucketed_average_stars_received.append(i)

In [31]:
np.unique(np.array(bucketed_average_stars_received), return_counts=True)

(array(['less_than_1', 'more_than_1_up_to_2', 'more_than_2_up_to_3',
        'more_than_3_up_to_4', 'more_than_4'], dtype='<U19'),
 array([  56,  756, 2216, 4616, 2747], dtype=int64))

In [32]:
philly_bus['bucketed_average_stars_received'] = np.array(bucketed_average_stars_received)
philly_bus

Unnamed: 0,index,_id,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours,positive_%,bucketed_average_stars_received
0,31,631ea3b2a5cde8cc0d6eec47,-0M0b-XhtFagyLmsBtOe8w,Paris Wine Bar,2303 Fairmount Ave,Philadelphia,PA,19130,39.967439,-75.175452,3.5,18,0,"{'Alcohol': ""u'full_bar'"", 'OutdoorSeating': '...","Bars, Nightlife, Restaurants, French, Wine Bars","{'Thursday': '17:0-0:0', 'Friday': '17:0-0:0',...",0.722222,more_than_3_up_to_4
1,32,631ea3b0a5cde8cc0d6dfa60,-0PN_KFPtbnLQZEeb23XiA,Mr Wong's Chinese Restaurant,1849 Wolf St,Philadelphia,PA,19145,39.923048,-75.178078,3.5,9,0,"{'OutdoorSeating': 'False', 'BusinessAcceptsCr...","Restaurants, Chinese","{'Monday': '11:0-23:0', 'Tuesday': '11:0-23:0'...",0.636364,more_than_3_up_to_4
2,33,631ea3aea5cde8cc0d6d5a50,-0TffRSXXIlBYVbb5AwfTg,IndeBlue Modern Indian Food & Spirits,205 South 13th St,Philadelphia,PA,19107,39.948508,-75.161969,4.5,1097,1,"{'RestaurantsReservations': 'True', 'NoiseLeve...","Cocktail Bars, Food Delivery Services, Nightli...","{'Monday': '0:0-0:0', 'Tuesday': '16:0-22:0', ...",0.874780,more_than_4
3,37,631ea3b0a5cde8cc0d6e2ef1,-0eUa8TsXFFy0FCxHYmrjg,Waterfront Gourmet Cafe & Deli,3131 Walnut St,Philadelphia,PA,19104,39.952446,-75.187321,4.0,26,0,"{'BikeParking': 'True', 'RestaurantsGoodForGro...","Caterers, Sandwiches, Delis, Restaurants, Cafe...","{'Monday': '7:0-20:0', 'Tuesday': '7:0-20:0', ...",0.821429,more_than_3_up_to_4
4,40,631ea3ada5cde8cc0d6d26c0,-0fvhILrC9UsQ6gLNpZlTQ,David's Southern Fried Pies,8601 Frankford Ave,Philadelphia,PA,19136,40.046191,-75.015090,4.5,18,0,"{'BusinessAcceptsBitcoin': 'False', 'Caters': ...","Desserts, Food","{'Monday': '0:0-0:0', 'Tuesday': '12:0-19:0', ...",0.894737,more_than_4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10386,100586,631ea3b3a5cde8cc0d6f417f,zyPABnfcXJagJGwReCsT1A,Jay's Pedal Power Bicycle Shop,512 E Girard Ave,Philadelphia,PA,19125,39.970766,-75.127960,3.0,9,0,"{'BikeParking': 'True', 'BusinessParking': ""{'...","Sporting Goods, Active Life, Shopping, Bike Re...",,0.444444,more_than_2_up_to_3
10387,100591,631ea3ada5cde8cc0d6d0f9e,zyge4T5eSiPHq1-IaJb_Qg,Nancy Le Nails,7419 Stenton Ave,Philadelphia,PA,19150,40.060970,-75.167412,2.0,10,1,"{'BusinessAcceptsCreditCards': 'False', 'ByApp...","Nail Salons, Beauty & Spas",,0.400000,more_than_1_up_to_2
10388,100599,631ea3afa5cde8cc0d6dde3d,zz-fcqurtm77bZ_rVvo2Lw,Yumtown,N 13th and Norris,Philadelphia,PA,19122,39.982937,-75.154732,4.5,24,0,"{'RestaurantsPriceRange2': '1', 'Alcohol': ""u'...","Food Stands, Restaurants","{'Monday': '11:0-16:0', 'Tuesday': '11:0-16:0'...",0.840000,more_than_4
10389,100603,631ea3b2a5cde8cc0d6ef422,zz3E7kmJI2r2JseE6LAnrw,Hung Vuong Super Market,1122 Washington Ave,Philadelphia,PA,19147,39.936582,-75.162655,3.5,99,1,"{'RestaurantsTakeOut': 'True', 'BusinessParkin...","Food, Grocery, International Grocery","{'Monday': '8:0-21:30', 'Tuesday': '8:0-21:30'...",0.660194,more_than_3_up_to_4


In [39]:
np.unique(np.array(philly_bus['positive_%']), return_counts=True)

(array([0.02173913, 0.03225806, 0.03333333, ..., 0.98064516, 0.98148148,
               nan]),
 array([  1,   1,   1, ...,   1,   1, 541], dtype=int64))

In [40]:
bucketed_sentiment_scores_received = []

for i in philly_bus['positive_%']:
    if .8 < i:
        bucketed_sentiment_scores_received.append('more_than_80_percent')
    elif .6 < i <= .8:
        bucketed_sentiment_scores_received.append('more_than_60_up_to_80_percent')
    elif .4 < i <= .6:
        bucketed_sentiment_scores_received.append('more_than_40_up_to_60_percent')
    elif .2 < i <= .4:
        bucketed_sentiment_scores_received.append('more_than_20_up_to_40_percent')
    elif i <= .2:
        bucketed_sentiment_scores_received.append('less_than_or_equal_to_20_percent')
    else:
        bucketed_sentiment_scores_received.append(i)

In [41]:
np.unique(np.array(bucketed_sentiment_scores_received), return_counts=True)

(array(['less_than_or_equal_to_20_percent',
        'more_than_20_up_to_40_percent', 'more_than_40_up_to_60_percent',
        'more_than_60_up_to_80_percent', 'more_than_80_percent', 'nan'],
       dtype='<U32'),
 array([ 402, 1059, 2152, 3892, 2345,  541], dtype=int64))

In [42]:
philly_bus['bucketed_sentiment_scores_received'] = np.array(bucketed_sentiment_scores_received)
philly_bus

Unnamed: 0,index,_id,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours,positive_%,bucketed_average_stars_received,bucketed_sentiment_scores_received
0,31,631ea3b2a5cde8cc0d6eec47,-0M0b-XhtFagyLmsBtOe8w,Paris Wine Bar,2303 Fairmount Ave,Philadelphia,PA,19130,39.967439,-75.175452,3.5,18,0,"{'Alcohol': ""u'full_bar'"", 'OutdoorSeating': '...","Bars, Nightlife, Restaurants, French, Wine Bars","{'Thursday': '17:0-0:0', 'Friday': '17:0-0:0',...",0.722222,more_than_3_up_to_4,more_than_60_up_to_80_percent
1,32,631ea3b0a5cde8cc0d6dfa60,-0PN_KFPtbnLQZEeb23XiA,Mr Wong's Chinese Restaurant,1849 Wolf St,Philadelphia,PA,19145,39.923048,-75.178078,3.5,9,0,"{'OutdoorSeating': 'False', 'BusinessAcceptsCr...","Restaurants, Chinese","{'Monday': '11:0-23:0', 'Tuesday': '11:0-23:0'...",0.636364,more_than_3_up_to_4,more_than_60_up_to_80_percent
2,33,631ea3aea5cde8cc0d6d5a50,-0TffRSXXIlBYVbb5AwfTg,IndeBlue Modern Indian Food & Spirits,205 South 13th St,Philadelphia,PA,19107,39.948508,-75.161969,4.5,1097,1,"{'RestaurantsReservations': 'True', 'NoiseLeve...","Cocktail Bars, Food Delivery Services, Nightli...","{'Monday': '0:0-0:0', 'Tuesday': '16:0-22:0', ...",0.874780,more_than_4,more_than_80_percent
3,37,631ea3b0a5cde8cc0d6e2ef1,-0eUa8TsXFFy0FCxHYmrjg,Waterfront Gourmet Cafe & Deli,3131 Walnut St,Philadelphia,PA,19104,39.952446,-75.187321,4.0,26,0,"{'BikeParking': 'True', 'RestaurantsGoodForGro...","Caterers, Sandwiches, Delis, Restaurants, Cafe...","{'Monday': '7:0-20:0', 'Tuesday': '7:0-20:0', ...",0.821429,more_than_3_up_to_4,more_than_80_percent
4,40,631ea3ada5cde8cc0d6d26c0,-0fvhILrC9UsQ6gLNpZlTQ,David's Southern Fried Pies,8601 Frankford Ave,Philadelphia,PA,19136,40.046191,-75.015090,4.5,18,0,"{'BusinessAcceptsBitcoin': 'False', 'Caters': ...","Desserts, Food","{'Monday': '0:0-0:0', 'Tuesday': '12:0-19:0', ...",0.894737,more_than_4,more_than_80_percent
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10386,100586,631ea3b3a5cde8cc0d6f417f,zyPABnfcXJagJGwReCsT1A,Jay's Pedal Power Bicycle Shop,512 E Girard Ave,Philadelphia,PA,19125,39.970766,-75.127960,3.0,9,0,"{'BikeParking': 'True', 'BusinessParking': ""{'...","Sporting Goods, Active Life, Shopping, Bike Re...",,0.444444,more_than_2_up_to_3,more_than_40_up_to_60_percent
10387,100591,631ea3ada5cde8cc0d6d0f9e,zyge4T5eSiPHq1-IaJb_Qg,Nancy Le Nails,7419 Stenton Ave,Philadelphia,PA,19150,40.060970,-75.167412,2.0,10,1,"{'BusinessAcceptsCreditCards': 'False', 'ByApp...","Nail Salons, Beauty & Spas",,0.400000,more_than_1_up_to_2,more_than_20_up_to_40_percent
10388,100599,631ea3afa5cde8cc0d6dde3d,zz-fcqurtm77bZ_rVvo2Lw,Yumtown,N 13th and Norris,Philadelphia,PA,19122,39.982937,-75.154732,4.5,24,0,"{'RestaurantsPriceRange2': '1', 'Alcohol': ""u'...","Food Stands, Restaurants","{'Monday': '11:0-16:0', 'Tuesday': '11:0-16:0'...",0.840000,more_than_4,more_than_80_percent
10389,100603,631ea3b2a5cde8cc0d6ef422,zz3E7kmJI2r2JseE6LAnrw,Hung Vuong Super Market,1122 Washington Ave,Philadelphia,PA,19147,39.936582,-75.162655,3.5,99,1,"{'RestaurantsTakeOut': 'True', 'BusinessParkin...","Food, Grocery, International Grocery","{'Monday': '8:0-21:30', 'Tuesday': '8:0-21:30'...",0.660194,more_than_3_up_to_4,more_than_60_up_to_80_percent


In [43]:
# feather format to compress
philly_bus.to_feather('business_philly.feather', compression='zstd')