### The objective of this notebook is to combine the previous dataframes into one major dataframe.

In [1]:
import pandas as pd
import numpy as np
import utils, plot_help
import matplotlib.pyplot as plt

#avoid warning popping up
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

%matplotlib inline

list of dataframes to merge:
- df_bus_conc.csv
- review_year_change.csv
- checkin_feat.csv

# Businesses

In [2]:
df_bus_conc = utils.chunk_loader('data/cleaned/df_bus_conc.csv')
df_bus_conc.head()

Unnamed: 0,address,attributes,business_id,categories,city,hours,is_open,latitude,longitude,name,...,Wednesday_open,Wednesday_close,Thursday_open,Thursday_close,Friday_open,Friday_close,Saturday_open,Saturday_close,Sunday_open,Sunday_close
0,2818 E Camino Acequia Drive,{'GoodForKids': 'False'},1SWheh84yJXfytovILXOAQ,"Golf, Active Life",Phoenix,,0,33.522143,-112.018481,Arizona Biltmore Golf Club,...,,,,,,,,,,
1,30 Eglinton Avenue W,"{'RestaurantsReservations': 'True', 'GoodForMe...",QXAEGFB4oINsVuTFxEYKFQ,"Specialty Food, Restaurants, Dim Sum, Imported...",Mississauga,"{'Monday': '9:0-0:0', 'Tuesday': '9:0-0:0', 'W...",1,43.605499,-79.652289,Emerald Chinese Restaurant,...,9:0,0:0,9:0,0:0,9:0,1:0,9:0,1:0,9:0,0:0
2,"10110 Johnston Rd, Ste 15","{'GoodForKids': 'True', 'NoiseLevel': ""u'avera...",gnKjwL_1w79qoiV3IC_xQQ,"Sushi Bars, Restaurants, Japanese",Charlotte,"{'Monday': '17:30-21:30', 'Wednesday': '17:30-...",1,35.092564,-80.859132,Musashi Japanese Restaurant,...,17:30,21:30,17:30,21:30,17:30,22:0,17:30,22:0,17:30,21:0
3,"15655 W Roosevelt St, Ste 237",,xvX2CttrVhyG2z1dFg_0xw,"Insurance, Financial Services",Goodyear,"{'Monday': '8:0-17:0', 'Tuesday': '8:0-17:0', ...",1,33.455613,-112.395596,Farmers Insurance - Paul Lorenz,...,8:0,17:0,8:0,17:0,8:0,17:0,,,,
4,"4209 Stuart Andrew Blvd, Ste F","{'BusinessAcceptsBitcoin': 'False', 'ByAppoint...",HhyxOkGAM07SRYtlQ4wMFQ,"Plumbing, Shopping, Local Services, Home Servi...",Charlotte,"{'Monday': '7:0-23:0', 'Tuesday': '7:0-23:0', ...",1,35.190012,-80.887223,Queen City Plumbing,...,7:0,23:0,7:0,23:0,7:0,23:0,7:0,23:0,7:0,23:0


In [3]:
df_bus_conc.shape

(192609, 120)

# Reviews

In [4]:
review_year_change = utils.chunk_loader('data/cleaned/review_year_change.csv')
review_year_change = review_year_change.rename(columns= {'index': 'business_id'})
review_year_change.head()

Unnamed: 0,business_id,cool_change,funny_change,stars_change,useful
0,--1UhMGODdWsrMastO9DZw,-0.5,0.0,-0.284091,-0.681818
1,--6MefnULPED_I942VcFNA,-0.736111,-0.847222,-0.194444,-0.763889
2,--7zmmkVg-IMGaXbuVd0SQ,0.272727,0.136364,0.0,0.181818
3,--8LPVSo5i0Oo61X01sV9A,0.0,0.0,-1.333333,-1.333333
4,--9QQLMTbFzLJ_oT-ON3Xw,-0.2,0.0,-0.3,-0.1


In [5]:
review_year_change.shape

(192606, 5)

# Checkins

In [6]:
checkin_feat = utils.chunk_loader('data/cleaned/checkin_feat.csv')
checkin_feat.head()

Unnamed: 0,business_id,avg_month_checkin,first,last,span_checkin
0,--1UhMGODdWsrMastO9DZw,1.2,2016-04-26 19:49:16,2017-05-03 17:58:02,79726
1,--6MefnULPED_I942VcFNA,12.166667,2011-06-04 18:22:23,2018-10-21 22:58:14,16551
2,--7zmmkVg-IMGaXbuVd0SQ,12.5,2015-01-17 01:49:14,2018-11-03 17:22:03,55969
3,--8LPVSo5i0Oo61X01sV9A,1.0,2016-07-08 16:43:30,2016-07-08 16:43:30,0
4,--9QQLMTbFzLJ_oT-ON3Xw,2.583333,2010-06-26 17:39:07,2018-06-16 18:44:45,3938


In [7]:
checkin_feat.shape

(161160, 5)

# Median Income

In [8]:
df_median = utils.chunk_loader('data/cleaned/business_median_income.csv')

In [9]:
df_median.head()

Unnamed: 0,business_id,median_income
0,1SWheh84yJXfytovILXOAQ,3.5
1,QXAEGFB4oINsVuTFxEYKFQ,3.0
2,gnKjwL_1w79qoiV3IC_xQQ,3.5
3,xvX2CttrVhyG2z1dFg_0xw,3.5
4,HhyxOkGAM07SRYtlQ4wMFQ,3.5


# combine dataframes in single df

In [10]:
df_list = [df_bus_conc, review_year_change, checkin_feat, df_median]

In [11]:
df_merge = utils.reduce_merge(df_list, 'business_id')
df_merge.head()

Unnamed: 0,address,attributes,business_id,categories,city,hours,is_open,latitude,longitude,name,...,Sunday_close,cool_change,funny_change,stars_change,useful,avg_month_checkin,first,last,span_checkin,median_income
0,2818 E Camino Acequia Drive,{'GoodForKids': 'False'},1SWheh84yJXfytovILXOAQ,"Golf, Active Life",Phoenix,,0,33.522143,-112.018481,Arizona Biltmore Golf Club,...,,0.0,0.0,0.0,0.0,1.777778,2012-06-16 14:53:33,2016-06-18 16:22:16,5323,3.5
1,30 Eglinton Avenue W,"{'RestaurantsReservations': 'True', 'GoodForMe...",QXAEGFB4oINsVuTFxEYKFQ,"Specialty Food, Restaurants, Dim Sum, Imported...",Mississauga,"{'Monday': '9:0-0:0', 'Tuesday': '9:0-0:0', 'W...",1,43.605499,-79.652289,Emerald Chinese Restaurant,...,0:0,-0.042484,-0.04902,-0.075163,-0.156863,36.083333,2010-07-05 16:56:31,2018-11-09 21:08:54,15143,3.0
2,"10110 Johnston Rd, Ste 15","{'GoodForKids': 'True', 'NoiseLevel': ""u'avera...",gnKjwL_1w79qoiV3IC_xQQ,"Sushi Bars, Restaurants, Japanese",Charlotte,"{'Monday': '17:30-21:30', 'Wednesday': '17:30-...",1,35.092564,-80.859132,Musashi Japanese Restaurant,...,21:0,-0.11,-0.19,0.055,-0.215,57.083333,2010-11-13 01:00:36,2018-11-10 17:15:54,58518,3.5
3,"15655 W Roosevelt St, Ste 237",,xvX2CttrVhyG2z1dFg_0xw,"Insurance, Financial Services",Goodyear,"{'Monday': '8:0-17:0', 'Tuesday': '8:0-17:0', ...",1,33.455613,-112.395596,Farmers Insurance - Paul Lorenz,...,,0.0,0.0,0.0,-0.5,1.222222,2012-10-29 22:36:50,2016-04-27 00:57:54,8464,3.5
4,"Credit Valley Town Plaza, F2 - 6045 Creditview Rd","{'BusinessParking': ""{'garage': False, 'street...",68dUKd8_8liJ7in4aWOSEA,"Shipping Centers, Couriers & Delivery Services...",Mississauga,"{'Monday': '9:0-19:0', 'Tuesday': '9:0-20:0', ...",1,43.599475,-79.711584,The UPS Store,...,,0.0,0.0,0.0,0.0,1.0,2014-03-31 21:46:17,2018-05-22 22:35:48,2971,3.0


In [12]:
#get list of columns
cols = list(df_merge.columns.values)
#remove is_open from list
cols.pop(cols.index('is_open'))
#move is_open to last position
df_merge= df_merge[cols + ['is_open']]

In [13]:
print(*df_merge.columns, sep=',  ')

address,  attributes,  business_id,  categories,  city,  hours,  latitude,  longitude,  name,  postal_code,  review_count,  stars,  state,  road_type,  GoodForKids,  RestaurantsReservations,  Caters,  RestaurantsTableService,  RestaurantsTakeOut,  RestaurantsPriceRange2,  OutdoorSeating,  BikeParking,  HasTV,  RestaurantsGoodForGroups,  RestaurantsDelivery,  BusinessAcceptsCreditCards,  BusinessAcceptsBitcoin,  ByAppointmentOnly,  AcceptsInsurance,  GoodForDancing,  CoatCheck,  HappyHour,  WheelchairAccessible,  DogsAllowed,  DriveThru,  Corkage,  BYOB,  Open24Hours,  RestaurantsCounterService,  dessert,  latenight,  lunch,  dinner,  brunch,  breakfast,  garage,  street,  validated,  lot,  valet,  romantic,  intimate,  classy,  hipster,  divey,  touristy,  trendy,  upscale,  casual,  dj,  background_music,  no_music,  jukebox,  live,  video,  karaoke,  monday,  tuesday,  friday,  wednesday,  thursday,  sunday,  saturday,  straightperms,  coloring,  extensions,  africanamerican,  curly,

In [14]:
df_merge.to_csv('data/cleaned/business_merge_feats.csv')