# yelp_business

#### Find the top business categories based on the total number of reviews. 

#### Output the category along with the total number of reviews.

#### Order by total reviews in descending order.

In [76]:
import pandas as pd

In [77]:
yelp_business = pd.read_csv('yelp_business.csv')

In [78]:
yelp_business.head()

Unnamed: 0,business_id,name,neighborhood,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,categories
0,G5ERFWvPfHy7IDAUYlWL2A,All Colors Mobile Bumper Repair,,7137 N 28th Ave,Phoenix,AZ,85051,33.448,-112.074,1.0,4,1,Auto Detailing;Automotive
1,0jDvRJS-z9zdMgOUXgr6rA,Sunfare,,811 W Deer Valley Rd,Phoenix,AZ,85027,33.683,-112.085,5.0,27,1,Personal Chefs;Food;Gluten-Free;Food Delivery ...
2,6HmDqeNNZtHMK0t2glF_gg,Dry Clean Vegas,Southeast,"2550 Windmill Ln, Ste 100",Las Vegas,NV,89123,36.042,-115.118,1.0,4,1,Dry Cleaning & Laundry;Laundry Services;Local ...
3,pbt3SBcEmxCfZPdnmU9tNA,The Cuyahoga Room,,740 Munroe Falls Ave,Cuyahoga Falls,OH,44221,41.14,-81.472,1.0,3,0,Wedding Planning;Caterers;Event Planning & Ser...
4,CX8pfLn7Bk9o2-8yDMp_2w,The UPS Store,,"4815 E Carefree Hwy, Ste 108",Cave Creek,AZ,85331,33.798,-111.977,1.5,5,1,Notaries;Printing Services;Local Services;Ship...


In [79]:
yelp_business.dtypes

business_id      object
name             object
neighborhood     object
address          object
city             object
state            object
postal_code      object
latitude        float64
longitude       float64
stars           float64
review_count      int64
is_open           int64
categories       object
dtype: object

### Reassign the 'categories' column to be list split on a semicolon

In [80]:
#reassign categories to be a split string
yelp_business['categories'] = yelp_business['categories'].str.split(";")

In [81]:
yelp_business.head()

Unnamed: 0,business_id,name,neighborhood,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,categories
0,G5ERFWvPfHy7IDAUYlWL2A,All Colors Mobile Bumper Repair,,7137 N 28th Ave,Phoenix,AZ,85051,33.448,-112.074,1.0,4,1,"[Auto Detailing, Automotive]"
1,0jDvRJS-z9zdMgOUXgr6rA,Sunfare,,811 W Deer Valley Rd,Phoenix,AZ,85027,33.683,-112.085,5.0,27,1,"[Personal Chefs, Food, Gluten-Free, Food Deliv..."
2,6HmDqeNNZtHMK0t2glF_gg,Dry Clean Vegas,Southeast,"2550 Windmill Ln, Ste 100",Las Vegas,NV,89123,36.042,-115.118,1.0,4,1,"[Dry Cleaning & Laundry, Laundry Services, Loc..."
3,pbt3SBcEmxCfZPdnmU9tNA,The Cuyahoga Room,,740 Munroe Falls Ave,Cuyahoga Falls,OH,44221,41.14,-81.472,1.0,3,0,"[Wedding Planning, Caterers, Event Planning & ..."
4,CX8pfLn7Bk9o2-8yDMp_2w,The UPS Store,,"4815 E Carefree Hwy, Ste 108",Cave Creek,AZ,85331,33.798,-111.977,1.5,5,1,"[Notaries, Printing Services, Local Services, ..."


### Drop the colums we  wont need 

In [82]:
yelp_business = yelp_business[['categories', 'review_count']]

In [83]:
yelp_business.head()

Unnamed: 0,categories,review_count
0,"[Auto Detailing, Automotive]",4
1,"[Personal Chefs, Food, Gluten-Free, Food Deliv...",27
2,"[Dry Cleaning & Laundry, Laundry Services, Loc...",4
3,"[Wedding Planning, Caterers, Event Planning & ...",3
4,"[Notaries, Printing Services, Local Services, ...",5


### Use explode to separate the categorys list components to new rows 
### This will preserve review counts for each item mentioned in the review

In [84]:
yelp_business = yelp_business.explode('categories')

In [85]:
yelp_business

Unnamed: 0,categories,review_count
0,Auto Detailing,4
0,Automotive,4
1,Personal Chefs,27
1,Food,27
1,Gluten-Free,27
...,...,...
99,Property Management,5
99,Condominiums,5
99,Apartments,5
99,Home Services,5


### Group by 'categories' and sum the review counts

In [86]:
yelp_business = yelp_business.groupby(by='categories').sum().reset_index()

In [87]:
yelp_business

Unnamed: 0,categories,review_count
0,Active Life,21
1,Acupuncture,25
2,American (New),242
3,American (Traditional),120
4,Apartments,5
...,...,...
178,Wholesalers,20
179,Windows Installation,3
180,Wine & Spirits,120
181,Wine Bars,8


### Sort the values to descending order

In [88]:
yelp_business.sort_values('review_count', ascending=False).reset_index(drop=True)

Unnamed: 0,categories,review_count
0,Restaurants,1703
1,Food,508
2,Pizza,456
3,Chinese,417
4,Japanese,350
...,...,...
178,Pilates,3
179,Food Stands,3
180,Sporting Goods,3
181,Curry Sausage,3
