---
# Predictive Modeling 
# Author: Adefemi Abimbola
---


In [1]:
#import necessary libraries 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score


In [2]:
#path to each cleaned data set unique to your system
review_path = '/Users/Defemi/Library/Mobile Documents/com~apple~CloudDocs/Data Sc II/Yelp JSON/reviews.csv'
business_path = '/Users/Defemi/Library/Mobile Documents/com~apple~CloudDocs/Data Sc II/Yelp JSON/business.csv'
user_path = '/Users/Defemi/Library/Mobile Documents/com~apple~CloudDocs/Data Sc II/Yelp JSON/user.csv'
checkin_path = '/Users/Defemi/Library/Mobile Documents/com~apple~CloudDocs/Data Sc II/Yelp JSON/checkin.csv'
tip_path = '/Users/Defemi/Library/Mobile Documents/com~apple~CloudDocs/Data Sc II/Yelp JSON/tip.csv'

In [3]:
# read the data
review = pd.read_csv(review_path)
business = pd.read_csv(business_path)
user = pd.read_csv(user_path)
checkin = pd.read_csv(checkin_path)
tip = pd.read_csv(tip_path)

In [32]:
# get the shape of each dataset
print("Review shape:", review.shape)
print("Business shape:", business.shape)
print("User shape:", user.shape)
print("Checkin shape:", checkin.shape)
print("Tip shape:", tip.shape)

Review shape: (1987897, 9)
Business shape: (150346, 12)
User shape: (1987897, 7)
Checkin shape: (131930, 2)
Tip shape: (908915, 5)


---
- Baseline Models(linear Regression & Decision tree)
    - Goal: Predicting business star rating using simple features
---

In [23]:
print("review data")
review.head()

review data


Unnamed: 0,review_id,user_id,business_id,stars,useful,funny,cool,text,date
0,KU_O5udG6zpxOg-VcAEodg,mh_-eMZ6K5RLWhZyISBhwA,XQfwVwDr-v0ZS3_CbbE5Xw,3.0,0,0,0,"If you decide to eat here, just be aware it is...",2018-07-07 22:09:11
1,BiTunyQ73aT9WBnpR9DZGw,OyoGAe7OKpv6SyGZT5g77Q,7ATYjTIgM3jUlt4UM3IypQ,5.0,1,0,1,I've taken a lot of spin classes over the year...,2012-01-03 15:28:18
2,saUsX_uimxRlCVr67Z4Jig,8g_iMtfSiwikVnbP2etR0A,YjUWPpI6HXG530lwP-fb2A,3.0,0,0,0,Family diner. Had the buffet. Eclectic assortm...,2014-02-05 20:30:30
3,AqPFMleE6RsU23_auESxiA,_7bHUi9Uuf5__HHc_Q8guQ,kxX2SOes4o-D3ZQBkiMRfA,5.0,1,0,1,"Wow! Yummy, different, delicious. Our favo...",2015-01-04 00:01:03
4,Sx8TMOWLNuJBWer-0pcmoA,bcjbaE6dDog4jkNY91ncLQ,e4Vwtrqf-wpJfwesgvdgxQ,4.0,1,0,1,Cute interior and owner (?) gave us tour of up...,2017-01-14 20:54:15


In [30]:
print("business data")
business.head()

business data


Unnamed: 0,business_id,name,address,city,state,postal_code,stars,review_count,is_open,attributes,categories,hours
0,Pns2l4eNsfO8kk83dixA6A,"Abby Rappoport, LAC, CMQ","1616 Chapala St, Ste 2",Santa Barbara,CA,93101,5.0,7,0,{'ByAppointmentOnly': 'True'},"Doctors, Traditional Chinese Medicine, Naturop...",
1,mpf3x-BjTdTEA3yCZrAYPw,The UPS Store,87 Grasso Plaza Shopping Center,Affton,MO,63123,3.0,15,1,{'BusinessAcceptsCreditCards': 'True'},"Shipping Centers, Local Services, Notaries, Ma...","{'Monday': '0:0-0:0', 'Tuesday': '8:0-18:30', ..."
2,tUFrWirKiKi_TAnsVWINQQ,Target,5255 E Broadway Blvd,Tucson,AZ,85711,3.5,22,0,"{'BikeParking': 'True', 'BusinessAcceptsCredit...","Department Stores, Shopping, Fashion, Home & G...","{'Monday': '8:0-22:0', 'Tuesday': '8:0-22:0', ..."
3,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,PA,19107,4.0,80,1,"{'RestaurantsDelivery': 'False', 'OutdoorSeati...","Restaurants, Food, Bubble Tea, Coffee & Tea, B...","{'Monday': '7:0-20:0', 'Tuesday': '7:0-20:0', ..."
4,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,101 Walnut St,Green Lane,PA,18054,4.5,13,1,"{'BusinessAcceptsCreditCards': 'True', 'Wheelc...","Brewpubs, Breweries, Food","{'Wednesday': '14:0-22:0', 'Thursday': '16:0-2..."


In [33]:
print("user data")
user.head()

user data


Unnamed: 0,user_id,name,review_count,elite,fans,average_stars,compliments_total
0,qVc8ODYU5SZjKXVBgXdI7w,Walker,585,2007,267,3.91,2873
1,j14WgRoU_-2ZE1aw1dXrJg,Daniel,4333,"2009,2010,2011,2012,2013,2014,2015,2016,2017,2...",3138,3.74,20631
2,2WnXYQFK0hXEoTxPtV2zvg,Steph,665,20092010201120122013,52,3.32,585
3,SZDeASXq7o05mMNLshsdIA,Gwen,224,200920102011,28,4.27,136
4,hA5lMy-EnncsH4JoR-hFGQ,Karen,79,,1,3.54,4


In [28]:
# aggregate the datasets according to the business_id
review_agg = review.groupby('business_id').agg({
    'stars': 'mean',
    'text': lambda x: x.str.len().mean(),  # average length
    'review_id': 'count'  # number of reviews
}).rename(columns={'stars': 'avg_review_rating', 'text': 'avg_review_length', 'review_id': 'num_reviews'})
#review['business_id'].nunique()


In [29]:

review_agg

Unnamed: 0_level_0,avg_review_rating,avg_review_length,num_reviews
business_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
---kPU91CF4Lq2-WlRu9Lw,4.500000,295.583333,24
--FWWsIwxRwuw9vIMImcQg,3.250000,521.000000,8
--SJXpAa0E-GCp2smaHf0A,2.400000,779.700000,10
--ZVrH2X2QXBFdCilbirsw,4.722222,378.000000,36
--_9CAxgfXZmoFdNIRrhHA,3.250000,546.500000,12
...,...,...,...
zziDpuuJw-Km1J4BaGpBKA,3.333333,607.666667,6
zzjFdJwXuxBOGe9JeY_EMw,4.104167,375.812500,48
zznJox6-nmXlGYNWgTDwQQ,1.633333,402.266667,30
zztOG2cKm87I6Iw_tleZsQ,4.833333,474.166667,6


In [34]:
baseline_df = business.merge(review_agg, on='business_id', how='left')

In [35]:
baseline_df

Unnamed: 0,business_id,name,address,city,state,postal_code,stars,review_count,is_open,attributes,categories,hours,avg_review_rating,avg_review_length,num_reviews
0,Pns2l4eNsfO8kk83dixA6A,"Abby Rappoport, LAC, CMQ","1616 Chapala St, Ste 2",Santa Barbara,CA,93101,5.0,7,0,{'ByAppointmentOnly': 'True'},"Doctors, Traditional Chinese Medicine, Naturop...",,4.857143,551.142857,7.0
1,mpf3x-BjTdTEA3yCZrAYPw,The UPS Store,87 Grasso Plaza Shopping Center,Affton,MO,63123,3.0,15,1,{'BusinessAcceptsCreditCards': 'True'},"Shipping Centers, Local Services, Notaries, Ma...","{'Monday': '0:0-0:0', 'Tuesday': '8:0-18:30', ...",3.133333,455.666667,15.0
2,tUFrWirKiKi_TAnsVWINQQ,Target,5255 E Broadway Blvd,Tucson,AZ,85711,3.5,22,0,"{'BikeParking': 'True', 'BusinessAcceptsCredit...","Department Stores, Shopping, Fashion, Home & G...","{'Monday': '8:0-22:0', 'Tuesday': '8:0-22:0', ...",3.500000,457.291667,24.0
3,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,PA,19107,4.0,80,1,"{'RestaurantsDelivery': 'False', 'OutdoorSeati...","Restaurants, Food, Bubble Tea, Coffee & Tea, B...","{'Monday': '7:0-20:0', 'Tuesday': '7:0-20:0', ...",4.057471,558.919540,87.0
4,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,101 Walnut St,Green Lane,PA,18054,4.5,13,1,"{'BusinessAcceptsCreditCards': 'True', 'Wheelc...","Brewpubs, Breweries, Food","{'Wednesday': '14:0-22:0', 'Thursday': '16:0-2...",4.692308,501.538462,13.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150341,IUQopTMmYQG-qRtBk-8QnA,Binh's Nails,3388 Gateway Blvd,Edmonton,AB,T6J 5H2,3.0,13,1,"{'ByAppointmentOnly': 'False', 'RestaurantsPri...","Nail Salons, Beauty & Spas","{'Monday': '10:0-19:30', 'Tuesday': '10:0-19:3...",,,
150342,c8GjPIOTGVmIemT7j5_SyQ,Wild Birds Unlimited,2813 Bransford Ave,Nashville,TN,37204,4.0,5,1,"{'BusinessAcceptsCreditCards': 'True', 'Restau...","Pets, Nurseries & Gardening, Pet Stores, Hobby...","{'Monday': '9:30-17:30', 'Tuesday': '9:30-17:3...",,,
150343,_QAMST-NrQobXduilWEqSw,Claire's Boutique,"6020 E 82nd St, Ste 46",Indianapolis,IN,46250,3.5,8,1,"{'RestaurantsPriceRange2': '1', 'BusinessAccep...","Shopping, Jewelry, Piercing, Toy Stores, Beaut...",,,,
150344,mtGm22y5c2UHNXDFAjaPNw,Cyclery & Fitness Center,2472 Troy Rd,Edwardsville,IL,62025,4.0,24,1,"{'BusinessParking': ""{'garage': False, 'street...","Fitness/Exercise Equipment, Eyewear & Optician...","{'Monday': '9:0-20:0', 'Tuesday': '9:0-20:0', ...",,,


In [36]:
# cleaning the baseline_df
baseline_df = baseline_df.dropna(subset=['avg_review_rating', 'avg_review_length', 'num_reviews'])

In [37]:
baseline_df

Unnamed: 0,business_id,name,address,city,state,postal_code,stars,review_count,is_open,attributes,categories,hours,avg_review_rating,avg_review_length,num_reviews
0,Pns2l4eNsfO8kk83dixA6A,"Abby Rappoport, LAC, CMQ","1616 Chapala St, Ste 2",Santa Barbara,CA,93101,5.0,7,0,{'ByAppointmentOnly': 'True'},"Doctors, Traditional Chinese Medicine, Naturop...",,4.857143,551.142857,7.0
1,mpf3x-BjTdTEA3yCZrAYPw,The UPS Store,87 Grasso Plaza Shopping Center,Affton,MO,63123,3.0,15,1,{'BusinessAcceptsCreditCards': 'True'},"Shipping Centers, Local Services, Notaries, Ma...","{'Monday': '0:0-0:0', 'Tuesday': '8:0-18:30', ...",3.133333,455.666667,15.0
2,tUFrWirKiKi_TAnsVWINQQ,Target,5255 E Broadway Blvd,Tucson,AZ,85711,3.5,22,0,"{'BikeParking': 'True', 'BusinessAcceptsCredit...","Department Stores, Shopping, Fashion, Home & G...","{'Monday': '8:0-22:0', 'Tuesday': '8:0-22:0', ...",3.500000,457.291667,24.0
3,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,PA,19107,4.0,80,1,"{'RestaurantsDelivery': 'False', 'OutdoorSeati...","Restaurants, Food, Bubble Tea, Coffee & Tea, B...","{'Monday': '7:0-20:0', 'Tuesday': '7:0-20:0', ...",4.057471,558.919540,87.0
4,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,101 Walnut St,Green Lane,PA,18054,4.5,13,1,"{'BusinessAcceptsCreditCards': 'True', 'Wheelc...","Brewpubs, Breweries, Food","{'Wednesday': '14:0-22:0', 'Thursday': '16:0-2...",4.692308,501.538462,13.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45100,oUnABk-zJQbwztdwONKSPA,Positive Changes Hypnosis Centers,"8030 W Emerald St, Ste 165",Boise,ID,83704,3.5,16,0,{'ByAppointmentOnly': 'False'},"Health & Medical, Weight Loss Centers, Hypnosi...","{'Monday': '9:0-20:0', 'Tuesday': '9:0-20:0', ...",3.142857,1238.000000,14.0
45101,RYDHmT31aJusVTmSp2WcBA,Twin Palms Community,14300 66th St N,Clearwater,FL,33764,1.5,6,1,,"Local Services, Community Service/Non-Profit",,1.000000,136.000000,1.0
45102,SEBtAgVuiZQtnqd__OifaA,Comfort Inn Nashville West,412 White Bridge Pl,Nashville,TN,37209,2.5,27,1,"{'RestaurantsPriceRange2': '2', 'BusinessAccep...","Hotels & Travel, Event Planning & Services, Ho...","{'Monday': '0:0-0:0', 'Tuesday': '0:0-0:0', 'W...",2.842105,459.789474,19.0
45103,Sfp_eCU4nmwneTngLyVsVw,LookAfter Hair Company,"1393 Big Bend Rd, Ste B",Ballwin,MO,63021,4.0,9,1,"{'ByAppointmentOnly': 'False', 'BikeParking': ...","Hair Salons, Beauty & Spas","{'Monday': '9:0-20:0', 'Tuesday': '9:0-20:0', ...",3.777778,422.111111,9.0


In [38]:
baseline_df.describe()

Unnamed: 0,stars,review_count,is_open,avg_review_rating,avg_review_length,num_reviews
count,45031.0,45031.0,45031.0,45031.0,45031.0,45031.0
mean,3.599676,45.602829,0.797784,3.607687,570.517218,44.145078
std,0.973669,117.949683,0.401657,0.973103,219.628086,115.450293
min,1.0,5.0,0.0,1.0,91.0,1.0
25%,3.0,8.0,1.0,3.0,425.718254,7.0
50%,3.5,15.0,1.0,3.75,528.395349,14.0
75%,4.5,38.0,1.0,4.362038,669.035875,36.0
max,5.0,5070.0,1.0,5.0,3453.5,4662.0
