In [2]:
import numpy as np
import pandas as pd

In [3]:
import os

### Reading Restaurant profile dataset

In [5]:
Restaurants = pd.read_csv('geoplaces2.csv')

In [6]:
## Display the data
Restaurants.head()

Unnamed: 0,placeID,latitude,longitude,the_geom_meter,name,address,city,state,country,fax,...,alcohol,smoking_area,dress_code,accessibility,price,url,Rambience,franchise,area,other_services
0,134999,18.915421,-99.184871,0101000020957F000088568DE356715AC138C0A525FC46...,Kiku Cuernavaca,Revolucion,Cuernavaca,Morelos,Mexico,?,...,No_Alcohol_Served,none,informal,no_accessibility,medium,kikucuernavaca.com.mx,familiar,f,closed,none
1,132825,22.147392,-100.983092,0101000020957F00001AD016568C4858C1243261274BA5...,puesto de tacos,esquina santos degollado y leon guzman,s.l.p.,s.l.p.,mexico,?,...,No_Alcohol_Served,none,informal,completely,low,?,familiar,f,open,none
2,135106,22.149709,-100.976093,0101000020957F0000649D6F21634858C119AE9BF528A3...,El Rinc�n de San Francisco,Universidad 169,San Luis Potosi,San Luis Potosi,Mexico,?,...,Wine-Beer,only at bar,informal,partially,medium,?,familiar,f,open,none
3,132667,23.752697,-99.163359,0101000020957F00005D67BCDDED8157C1222A2DC8D84D...,little pizza Emilio Portes Gil,calle emilio portes gil,victoria,tamaulipas,?,?,...,No_Alcohol_Served,none,informal,completely,low,?,familiar,t,closed,none
4,132613,23.752903,-99.165076,0101000020957F00008EBA2D06DC8157C194E03B7B504E...,carnitas_mata,lic. Emilio portes gil,victoria,Tamaulipas,Mexico,?,...,No_Alcohol_Served,permitted,informal,completely,medium,?,familiar,t,closed,none


In [7]:
print(Restaurants.shape[0])
print(Restaurants.shape[1])

130
21


In [8]:
Restaurants.isnull().sum()

placeID           0
latitude          0
longitude         0
the_geom_meter    0
name              0
address           0
city              0
state             0
country           0
fax               0
zip               0
alcohol           0
smoking_area      0
dress_code        0
accessibility     0
price             0
url               0
Rambience         0
franchise         0
area              0
other_services    0
dtype: int64

#### Basic Data Cleaning

In [9]:
columns_to_drop = ['latitude','longitude','the_geom_meter','address','city','state','country','fax','zip','url',
                  'franchise']

In [10]:
Restaurants = Restaurants.drop(columns = columns_to_drop)

In [11]:
Restaurants.head()

Unnamed: 0,placeID,name,alcohol,smoking_area,dress_code,accessibility,price,Rambience,area,other_services
0,134999,Kiku Cuernavaca,No_Alcohol_Served,none,informal,no_accessibility,medium,familiar,closed,none
1,132825,puesto de tacos,No_Alcohol_Served,none,informal,completely,low,familiar,open,none
2,135106,El Rinc�n de San Francisco,Wine-Beer,only at bar,informal,partially,medium,familiar,open,none
3,132667,little pizza Emilio Portes Gil,No_Alcohol_Served,none,informal,completely,low,familiar,closed,none
4,132613,carnitas_mata,No_Alcohol_Served,permitted,informal,completely,medium,familiar,closed,none


In [12]:
Restaurants['placeID'].nunique()

130

In [13]:
Restaurants.rename (columns = { 'placeID': 'ID', 'Rambience' : 'ambience' }, inplace = True)

In [14]:
Restaurants.head()

Unnamed: 0,ID,name,alcohol,smoking_area,dress_code,accessibility,price,ambience,area,other_services
0,134999,Kiku Cuernavaca,No_Alcohol_Served,none,informal,no_accessibility,medium,familiar,closed,none
1,132825,puesto de tacos,No_Alcohol_Served,none,informal,completely,low,familiar,open,none
2,135106,El Rinc�n de San Francisco,Wine-Beer,only at bar,informal,partially,medium,familiar,open,none
3,132667,little pizza Emilio Portes Gil,No_Alcohol_Served,none,informal,completely,low,familiar,closed,none
4,132613,carnitas_mata,No_Alcohol_Served,permitted,informal,completely,medium,familiar,closed,none


In [15]:
Restaurants.to_csv('Restaurants.csv')

### Reading Cuisine dataset

In [16]:
Cuisine = pd.read_csv('chefmozcuisine.csv')

In [17]:
Cuisine.head()

Unnamed: 0,placeID,Rcuisine
0,135110,Spanish
1,135109,Italian
2,135107,Latin_American
3,135106,Mexican
4,135105,Fast_Food


In [18]:
print(Cuisine.shape[0])
print(Cuisine.shape[1])

916
2


In [19]:
Cuisine.isnull().sum()

placeID     0
Rcuisine    0
dtype: int64

In [20]:
Cuisine.rename ( columns = {'placeID' : 'ID', 'Rcuisine' : 'Cuisine'}, inplace = True)

In [21]:
Cuisine.to_csv('Cuisine.csv')

### Hours dataset

In [8]:
Hours = pd.read_csv('chefmozhours4.csv')

In [9]:
Hours.head()

Unnamed: 0,placeID,hours,days
0,135111,00:00-23:30;,Mon;Tue;Wed;Thu;Fri;
1,135111,00:00-23:30;,Sat;
2,135111,00:00-23:30;,Sun;
3,135110,08:00-19:00;,Mon;Tue;Wed;Thu;Fri;
4,135110,00:00-00:00;,Sat;


In [10]:
print(Hours.shape[0])
print(Hours.shape[1])

2339
3


In [11]:
Hours.isnull().sum()

placeID    0
hours      0
days       0
dtype: int64

In [12]:
## Mapping Days as 'Weekdays' and 'Weekend' as category column in dataframe
mapping = {'Mon;Tue;Wed;Thu;Fri;': 'Weekdays','Sat;': 'Weekend', 'Sun;': 'Weekend'}

# Apply the mapping to the 'days' column
Hours['category'] = Hours['days'].apply(lambda x: mapping.get(x, x))

In [18]:
# Remove the semicolon ';' from the 'hours' column
Hours['hours'] = Hours['hours'].str.replace(';', '')

In [19]:
Hours.head()

Unnamed: 0,placeID,hours,days,category
0,135111,00:00-23:30,Mon;Tue;Wed;Thu;Fri;,Weekdays
1,135111,00:00-23:30,Sat;,Weekend
2,135111,00:00-23:30,Sun;,Weekend
3,135110,08:00-19:00,Mon;Tue;Wed;Thu;Fri;,Weekdays
4,135110,00:00-00:00,Sat;,Weekend


In [28]:
Hours.rename ( columns = { 'placeID': 'ID'}, inplace = True )

In [29]:
Hours.head()

Unnamed: 0,ID,hours,days,category
0,135111,00:00-23:30;,Mon;Tue;Wed;Thu;Fri;,Weekdays
1,135111,00:00-23:30;,Sat;,Weekend
2,135111,00:00-23:30;,Sun;,Weekend
3,135110,08:00-19:00;,Mon;Tue;Wed;Thu;Fri;,Weekdays
4,135110,00:00-00:00;,Sat;,Weekend


In [30]:
Hours.to_csv('hours.csv')

### Payment Method dataset

In [31]:
Payment = pd.read_csv('chefmozaccepts.csv')

In [32]:
Payment.head()

Unnamed: 0,placeID,Rpayment
0,135110,cash
1,135110,VISA
2,135110,MasterCard-Eurocard
3,135110,American_Express
4,135110,bank_debit_cards


In [33]:
print(Payment.shape[0])
print(Payment.shape[1])

1314
2


In [34]:
Payment.isnull().sum()

placeID     0
Rpayment    0
dtype: int64

In [35]:
Payment.rename (columns = {'placeID': 'ID', 'Rpayment': 'Payment_method'}, inplace = True)

In [36]:
Payment.head()

Unnamed: 0,ID,Payment_method
0,135110,cash
1,135110,VISA
2,135110,MasterCard-Eurocard
3,135110,American_Express
4,135110,bank_debit_cards


In [37]:
Payment.to_csv('Payment.csv')

### Parking dataset

In [38]:
Parking = pd.read_csv('chefmozparking.csv')

In [39]:
Parking.head()

Unnamed: 0,placeID,parking_lot
0,135111,public
1,135110,none
2,135109,none
3,135108,none
4,135107,none


In [40]:
print(Parking.shape[0])
print(Parking.shape[1])

702
2


In [41]:
Parking.isnull().sum()

placeID        0
parking_lot    0
dtype: int64

In [42]:
Parking.rename (columns = {'placeID': 'ID'}, inplace = True)

In [43]:
Parking.to_csv('Parking.csv')

### Ratings dataset

In [44]:
Ratings = pd.read_csv('rating_final.csv')

In [45]:
Ratings.head()

Unnamed: 0,userID,placeID,rating,food_rating,service_rating
0,U1077,135085,2,2,2
1,U1077,135038,2,2,1
2,U1077,132825,2,2,2
3,U1077,135060,1,2,2
4,U1068,135104,1,1,2


In [46]:
Ratings.isnull().sum()

userID            0
placeID           0
rating            0
food_rating       0
service_rating    0
dtype: int64

In [47]:
columns_to_drop = ['userID']

In [48]:
Ratings = Ratings.drop( columns = columns_to_drop)

In [49]:
Ratings.rename (columns = {'placeID' : 'ID', 'rating' : 'overall_rating'}, inplace = True)

In [50]:
Ratings.head()

Unnamed: 0,ID,overall_rating,food_rating,service_rating
0,135085,2,2,2
1,135038,2,2,1
2,132825,2,2,2
3,135060,1,2,2
4,135104,1,1,2


In [51]:
Ratings.to_csv('Ratings.csv')