# Importación de librerías

In [29]:
from google.cloud import bigquery

import pandas as pd
import numpy as np

In [30]:
# Autentica tu cuenta de GCP
client = bigquery.Client()

# Carga de datos

## Tablas Bigquery 

In [31]:
# Se cargan los datos de Big Query
dataset_id = "proyecto-nuevo-423502.Data_Automatizada_Final"

# Atributos
table_id_atributos = "proyecto-nuevo-423502.Data_Automatizada_Final.atributos"

# Categorías
table_id_categorias = "proyecto-nuevo-423502.Data_Automatizada_Final.categorias"

# Restaurantes
table_id_restaurantes = "proyecto-nuevo-423502.Data_Automatizada_Final.restaurantes"

# Reviews
table_id_reviews = "proyecto-nuevo-423502.Data_Automatizada_Final.reviews"

# Usuarios
table_id_usuarios = "proyecto-nuevo-423502.Data_Automatizada_Final.usuarios"

## Datasets

### Unicos

In [32]:
# Se cargan los datasets

# Listado unívoco de locales de ambos datasets
data_dfgy_rest_uniques = "gs://archivos-preprocesados-henry/dfgy_rst_final.parquet"
dfgy_rest_uniques = pd.read_parquet(data_dfgy_rest_uniques)

### Yelp

In [33]:
# Yelp

## restaurante
data_yelp_rest = "gs://archivos-preprocesados-henry/dfy_rest.parquet"
yelp_rest = pd.read_parquet(data_yelp_rest)

## user
data_yelp_user = "gs://archivos-preprocesados-henry/dfyusr.parquet"
yelp_user = pd.read_parquet(data_yelp_user)

## reviews
data_yelp_reviews = "gs://archivos-preprocesados-henry/dfyrev.parquet"
yelp_reviews = pd.read_parquet(data_yelp_reviews)

## categorías
data_yelp_site_categories = "gs://archivos-preprocesados-henry/dfy_rest_categories.parquet"
yelp_site_categories = pd.read_parquet(data_yelp_site_categories)

## atributos
data_yelp_site_attributes = "gs://archivos-preprocesados-henry/dfy_attributes.parquet"
yelp_site_attributes = pd.read_parquet(data_yelp_site_attributes)

### Google

In [34]:
# Google

## Restaurantes
data_google_rest = "gs://archivos-preprocesados-henry/dfg_rest.parquet"
google_rest = pd.read_parquet(data_google_rest)

## Reviews
data_google_reviews = "gs://archivos-preprocesados-henry/dfg_reviews.parquet"
google_reviews = pd.read_parquet(data_google_reviews)

## Categorías
data_google_site_categories = "gs://archivos-preprocesados-henry/dfg_categories.parquet"
google_site_categories = pd.read_parquet(data_google_site_categories)

## Atributos
data_atributos_google = "gs://archivos-preprocesados-henry/dfg_attributes.parquet"
atributos_google = pd.read_parquet(data_atributos_google)

# Procesamiento de la data

## Yelp

### Reviews

In [35]:
### Reviews
dfy_reviews = yelp_reviews.copy()

dfy_reviews['source'] = 'yelp'
dfy_reviews = dfy_reviews[['source','business_id','user_id', 'date', 'month', 'year', 'stars', 'polarity', 'sentiment']]
dfy_reviews = dfy_reviews.rename(columns={'business_id': 'site_id', 'date':'datetime', 'stars':'rating'})

### Restaurantes

In [36]:
### Restaurantes
dfy_rest = yelp_rest

dfy_rest['source'] = 'yelp'
dfy_rest.rename(columns={"name_x":"name","city_x":"city","state_x":"state"},inplace=True)
dfy_rest = dfy_rest[['source','business_id', 'name', 'state', 'city', 'postal_code', 'price', 'stars', 'review_count']]
dfy_rest = dfy_rest.rename(columns={'business_id': 'site_id', 'stars':'rating_avg', 'review_count':'reviews_count'})

dfy_rest.dropna(subset=['price'], inplace=True)

    # Agregado de fecha de inicio de acitividad
dfy_rest_date_start = dfy_reviews.groupby('site_id')['datetime'].min().reset_index()
dfy_rest = pd.merge(dfy_rest, dfy_rest_date_start, how='left', on=['site_id'])
dfy_rest = dfy_rest.rename(columns={'datetime': 'date_start'})

dfy_rest['year'] = dfy_rest['date_start'].dt.year
dfy_rest['month'] = dfy_rest['date_start'].dt.month

dfy_rest['state_city'] = dfy_rest['state'].str.cat(dfy_rest['city'], sep=' - ')
dfy_rest['city_postalcode'] = dfy_rest['city'].str.cat(dfy_rest['postal_code'], sep=' - ')
dfy_rest['state_city_postalcode'] = dfy_rest['state'].str.cat(dfy_rest['city'], sep=' - ').str.cat(dfy_rest['postal_code'], sep=' - ')

### Usuarios

In [37]:
### Usuarios
dfy_user =yelp_user.copy()

dfy_user = dfy_user[['user_id', 'review_count', 'yelping_since', 'average_stars']]
dfy_user = dfy_user.rename(columns={'review_count':'reviews_count', 'yelping_since':'date_start', 'average_stars':'rating_avg'})

dfy_user['date_start'] = pd.to_datetime(dfy_user['date_start'], errors='coerce')
dfy_user.reset_index(drop=True, inplace=True)

dfy_user['year'] = dfy_user['date_start'].dt.year
dfy_user['month'] = dfy_user['date_start'].dt.month

### Categorías

In [38]:
### Categorías
dfy_categories = yelp_site_categories[['site_id', 'categories']]
dfy_categories['source'] = 'yelp'
dfy_categories = dfy_categories[['source', 'site_id', 'categories']]

## Google

### Reviews

In [39]:
### Reviews
dfg_reviews = google_reviews
dfg_reviews['source'] = 'google'
dfg_reviews['month'] = dfg_reviews['datetime'].dt.month
dfg_reviews['year'] = dfg_reviews['datetime'].dt.year
dfg_reviews = dfg_reviews[['source', 'gmap_id','user_id', 'datetime', 'month', 'year', 'rating', 'vader_polarity', 'vader_sentiment']]
dfg_reviews = dfg_reviews.rename(columns={'gmap_id': 'site_id', 'vader_polarity':'polarity', 'vader_sentiment':'sentiment'})

### Restaurantes

In [40]:
### Restaurantes
dfg_rest = google_rest
dfg_rest['source'] = 'google'
dfg_rest = dfg_rest[['source','gmap_id', 'name', 'state', 'city', 'postal_code', 'price', 'avg_rating', 'num_of_reviews']]
dfg_rest = dfg_rest.rename(columns={'gmap_id': 'site_id', 'avg_rating':'rating_avg', 'num_of_reviews':'reviews_count'})

    # Agregado de fecha de inicio de acitividad
dfg_rest_date_start = dfg_reviews.groupby('site_id')['datetime'].min().reset_index()
dfg_rest = pd.merge(dfg_rest, dfg_rest_date_start, how='left', on=['site_id'])
dfg_rest = dfg_rest.rename(columns={'datetime': 'date_start'})

dfg_rest['year'] = dfg_rest['date_start'].dt.year
dfg_rest['month'] = dfg_rest['date_start'].dt.month

dfg_rest['state_city'] = dfg_rest['state'].str.cat(dfg_rest['city'], sep=' - ')
dfg_rest['city_postalcode'] = dfg_rest['city'].str.cat(dfg_rest['postal_code'], sep=' - ')
dfg_rest['state_city_postalcode'] = dfg_rest['state'].str.cat(dfy_rest['city'], sep=' - ').str.cat(dfg_rest['postal_code'], sep=' - ')

### User

In [41]:
### User (se crea el dataset)
dfg_user = dfg_reviews.groupby(['user_id']).agg({'site_id': 'count', 'datetime': 'min', 'rating': 'mean'}).reset_index()
dfg_user = dfg_user.rename(columns={'site_id':'reviews_count', 'datetime':'date_start', 'rating':'rating_avg'})
dfg_user['year'] = dfg_user['date_start'].dt.year
dfg_user['month'] = dfg_user['date_start'].dt.month

### Categorías

In [42]:
### Categorías
dfg_categories = google_site_categories.copy()

dfg_categories.rename(columns={'category': 'categories'}, inplace=True)
dfg_categories['source'] = 'google'
dfg_categories = dfg_categories[['source', 'site_id', 'categories']]

### Atributos

In [43]:
### Atributos

dfg_attributes = atributos_google.copy()
dfg_attributes["source"] = "google"

# Combinación de los datasets

## Restaurantes

In [44]:
# Combinación de los datasets

## Restaurantes
    # Filtrado
dfy_rest = dfy_rest[dfy_rest['site_id'].isin(dfgy_rest_uniques['business_id'])]
    # Union
dfgy_rest = pd.concat([dfy_rest, dfg_rest])
    # Eliminación de duplicados
dfgy_rest= dfgy_rest.drop_duplicates(subset=['site_id'], keep='first')
    # Procesamiento del tipo de dato.
dfgy_rest[["source",'site_id',"name","state","city","state_city","city_postalcode","state_city_postalcode"]] = dfgy_rest[["source",'site_id',"name","state","city","state_city","city_postalcode","state_city_postalcode"]].astype(str)
dfgy_rest[["postal_code","year","month"]] = dfgy_rest[["postal_code","year","month"]].astype(float).fillna(0).astype(int)

## User

In [45]:
## User
dfgy_user = pd.concat([dfy_user, dfg_user])
    # Procesamiento del tipo de dato.
dfgy_user['user_id'] = dfgy_user['user_id'].astype(str)
    #Eliminación de duplicados
dfgy_user = dfgy_user.drop_duplicates(subset=['user_id'], keep='first')

## Categorías

In [46]:
## Categorías
    # Filtrado
dfy_categories = dfy_categories[dfy_categories['site_id'].isin(dfgy_rest_uniques['business_id'])]

    # Union
dfgy_categories = pd.concat([dfy_categories, dfg_categories])
    
    # Procesamiento del tipo de dato.
dfgy_categories[['categories',"site_id","source"]] = dfgy_categories[['categories',"site_id","source"]].astype(str)

In [74]:
# Se hace una limpieza de las categorías, seleccionando aquellas que se desean mantener.
mantener = ["Steamboat restaurant", "Madrilian restaurant", "Sundanese restaurant", "Chinese bakery", "Indian sweets shop", "Anhui restaurant",
            "Shandong restaurant", "Comic cafe", "Icelandic restaurant", "Yakisoba Restaurant", "Champon noodle restaurant", "Welsh restaurant",
            "South Indian restaurant", "Jiangsu restaurant", "Valencian restaurant", "Sukiyaki restaurant", "Scottish restaurant", "Provence restaurant", "Pizza takeaway", "Takeout restaurant", "Satay restaurant", "Chettinad restaurant", "Sfiha restaurant", "Mutton barbecue restaurant","Modern Indian restaurant", "Norwegian restaurant", "Traditional restaurant", "Polish restaurant",
            "Hong Kong style fast food restaurant", "Canadian restaurant", "Haitian restaurant","Swedish restaurant",
            "Japanese curry restaurant", "Brewpub", "Eastern European restaurant", "Russian restaurant", "Ukrainian restaurant", "Art cafe", "Northern Italian restaurant", "Shabu-shabu restaurant", "Meat dish restaurant", "Chophouse restaurant", "Western restaurant",
            "Wok restaurant", "Conveyor belt sushi restaurant", "Pueblan restaurant", "Pozole restaurant", "Nepalese restaurant", "South American restaurant", "Seafood donburi restaurant", "Cape Verdean restaurant", "Continental restaurant", "Biryani restaurant","Country food restaurant", "Sundae restaurant", "Po’ boys restaurant", "Fondue restaurant", "Tonkatsu restaurant",
            "Non vegetarian restaurant", "Berry restaurant", "Venezuelan restaurant", "Sushi takeaway",
            "Sichuan restaurant", "Sweets and dessert buffet", "Japanese food", "Cambodian restaurant", "Izakaya restaurant",
            "Sukiyaki and Shabu Shabu restaurant", "Indian Muslim restaurant", "Pacific Northwest restaurant (US)",
            "Chocolate cafe", "British restaurant", "Armenian restaurant", "Kebab shop", "Uzbeki restaurant", "Egyptian restaurant",
            "Georgian restaurant", "French steakhouse restaurant", "Haute French restaurant", "Pan-Latin restaurant", "Nicaraguan restaurant",
            "Mid-Atlantic restaurant (US)", "Israeli restaurant", "Wine", "Modern European restaurant", "Romanian restaurant", "Takoyaki restaurant", "Afghani restaurant", "Chinese food", "BBQ area", "Fish and seafood restaurant", "Yucatan restaurant", "Floridian restaurant", "Eritrean restaurant", "Burmese restaurant", "Dutch restaurant", "Momo restaurant", "Barbecue area", "Shanghainese restaurant", "Coffee", "Basque restaurant", "Porridge restaurant", "Kerala restaurant", "Syrian restaurant", "Tuscan restaurant", "Raclette restaurant", "Native American restaurant", "Self service restaurant", "Australian restaurant", "Chilean restaurant", "Japanese regional restaurant", "Croatian restaurant", "Chinese tea house", "Paraguayan restaurant", "Lithuanian restaurant",
            "Kyoto style Japanese restaurant", "Japanese sweets restaurant", "Tongue restaurant", "Polynesian restaurant", "Bar tabac",
            "Angler fish restaurant", "Japanized western restaurant", "Yakitori restaurant", "Tofu restaurant", "South African restaurant",
            "Uruguayan restaurant", "Yemenite restaurant", "Sri Lankan restaurant", "Indian sizzler restaurant", "Korean grocery store",
            "English restaurant", "Costa Rican restaurant", "New England restaurant", "Hakka restaurant", "Korean beef restaurant", "Korean rib restaurant", "Bulgarian restaurant", "Kushiyaki restaurant", "Serbian restaurant", "Fugu restaurant",
            "Hungarian restaurant", "Obanzai restaurant", "Unagi restaurant", "North African restaurant", "Modern izakaya restaurants",
            "Pacific Rim restaurant", "Asian", "Pennsylvania Dutch restaurant", "Austrian restaurant", "Swiss restaurant", "Japanese delicatessen", "Czech restaurant", "Macrobiotic restaurant", "Catalonian restaurant", "Tunisian restaurant", "Soondae restaurant",
            "Syokudo and Teishoku restaurant", "Goan restaurant", "Couscous restaurant", "New Zealand restaurant", "Roman restaurant",
            "Tibetan restaurant", "Tempura restaurant", "Offal barbecue restaurant", "Cosplay cafe", "Chesapeake restaurant", "Asturian restaurant", "Kazakhstani restaurant", "Danish restaurant", "Japanese cheap sweets shop", "Anago restaurant", "Modern British restaurant", "Turkmen restaurant", "Bubble Tea", "Coffee & Tea", "Bakeries", "Brewpubs", "Breweries", "Burgers", "Sandwiches", "Ice Cream & Frozen Yogurt", "Italian", "American (Traditional)", "Greek", "Vietnamese", "Food Trucks", "Diners", "Breakfast & Brunch", "Delis", "Sushi Bars", "Japanese", "Korean", "Hot Dogs", "Seafood", "Cocktail Bars", "Pizza", "Chicken Wings", "Specialty Food", "Steakhouses", "Pasta Shops", "Chinese", "Wine Bars", "Cafes", "Juice Bars & Smoothies", "American (New)", "Soup",
            "Sports Bars", "Chocolatiers & Shops", "Candy Stores", "Salad", "Beer Bar", "Lounges", "Wraps", "Beer", "Wine & Spirits", "Gastropubs", "Mexican", "French", "Moroccan", "Mediterranean", "Beer Gardens", "Desserts", "Cupcakes", "Patisserie/Cake Shop",
            "Live/Raw Food", "Filipino", "Barbeque", "Chicken Shop", "Southern", "Donuts", "Hawaiian", "Irish", "Hookah Bars", "Vegan", "Soul Food", "Shanghainese", "Szechuan", "Asian Fusion", "Thai", "Irish Pub", "Coffee Roasteries", "Caribbean", "Trinidadian",  "Gluten-Free", "Latin American", "Comfort Food", "Acai Bowls", "Vegetarian", "Pakistani", "Indian", "Halal", "Empanadas",
            "Middle Eastern", "Tacos", "Puerto Rican", "Whiskey Bars", "Ramen", "Spanish", "Cheesesteaks", "Pretzels", "African", "Kebab", "Turkish", "Tea Rooms", "Lebanese", "Gelato", "Creperies", "Falafel", "Internet Cafes", "Tapas Bars", "Noodles", "Dive Bars",
            "Peruvian", "Cuban", "Themed Cafes", "Gay Bars", "Tapas/Small Plates", "Sardinian", "Laotian", "Teppanyaki", "Ethiopian", "Persian/Iranian", "Hong Kong Style Cafe","Taiwanese","Modern European","Pan Asian","New Mexican Cuisine","Oriental", "Dominican",
            "Bed & Breakfast", "Arabic", "Tiki Bars", "Argentine", "Portuguese", "Dim Sum", "Hot Pot", "German", "Fondue", "Kosher", "Cantonese",
            "Brasseries", "Distilleries", "Salvadoran", "Mongolian", "British", "Waffles", "Piano Bars", "Speakeasies", "Syrian", 
            "Popcorn Shops", "Armenian", "Honduran", "Cigar Bars", "Venezuelan", "Colombian", "Himalayan/Nepalese", "Pop-Up Restaurants",
            "Polish", "Hainan", "Kombucha", "Russian", "Cambodian", "Afghan", "Somali", "Indonesian", "Brazilian", "Champagne Bars",
            "Senegalese", "Ethical Grocery","Drive-Thru Bars", "Malaysian", "Macarons", "Austrian", "Basque", "Calabrian",  "Australian","Iberian",  "Japanese Curry", "Haitian", "Dinner Theater", "Izakaya", "Pancakes", "Egyptian", "Scandinavian","Israeli", "Beer Hall","Delicatessen", "Uzbek", "Georgian", "South African", "Tuscan", "Czech", "Scottish", "Roman", "Ukrainian", "Hungarian",
            "Guamanian", "Poutineries", "Cucina campana", "Serbo Croatian", "Korean restaurant", "Bakery", "Health food restaurant",
            "Buffet restaurant", "Cocktail bar", "Family restaurant", "Seafood restaurant", "Dominican restaurant", "Taco restaurant","Mexican restaurant","Restaurant or cafe","Bar & grill","Breakfast restaurant", "Lunch restaurant", "Fried chicken takeaway", "Asian restaurant", "Hot pot restaurant", "Chinese restaurant", "Mediterranean restaurant", "Coffee shop", "Bagel shop", "Donut shop","Takeout Restaurant", "Italian restaurant", "Wine bar", "Ramen restaurant", "American restaurant","Modern French restaurant",
            "Pizza restaurant", "Juice shop", "Vegetarian restaurant", "Delivery Restaurant","Pizza delivery", "Pizza Takeout", "Sushi restaurant", "Japanese restaurant", "Creole restaurant", "Caribbean restaurant", "Barbecue restaurant", "Coffee store",
            "Snack bar", "Soft drinks shop", "Cafe", "Ice cream shop", "Hamburger restaurant", "Sandwich shop", "Soul food restaurant","Burrito restaurant","Gay bar","Latin American restaurant","Spanish restaurant","Cheesesteak restaurant","Jamaican restaurant",
            "Indian restaurant", "Dessert shop","Vegan restaurant", "Bistro", "Kosher restaurant", "Southwestern restaurant (US)",
            "Chicken restaurant", "Deli", "Thai restaurant", "Asian fusion restaurant", "Chinese noodle restaurant","Dumpling restaurant",
            "Soup restaurant", "Espresso bar", "Guatemalan restaurant","Peruvian restaurant","Grill", "Fusion restaurant",
            "Middle Eastern restaurant", "Turkish restaurant", "Vietnamese restaurant","New American restaurant", "Rice restaurant", "Chicken wings restaurant", "Hookah bar", "Food and drink", "Hot dog restaurant", "Vineyard", "Ethiopian restaurant", "Gluten-free restaurant", "Karaoke bar", "Dessert restaurant", "Southeast Asian restaurant", "Brazilian restaurant", "Indonesian restaurant","Cake shop", "Cupcake shop", "Cajun restaurant", "French restaurant", "Crêperie", "Tea house", "Steak house", "Salad shop",
            "Pho restaurant","Filipino restaurant", "Pretzel store", "Tapas restaurant", "Tex-Mex restaurant","African restaurant","West African restaurant","Brunch restaurant", "Organic restaurant", "Frozen yogurt shop", "Eclectic restaurant", "Lounge",
            "German restaurant", "European restaurant", "Bubble tea store", "Singaporean restaurant", "Hunan restaurant",
            "Chinese takeaway", "Delivery Chinese restaurant", "Pan-Asian restaurant", "Coffee roasters", "Salvadoran restaurant",
            "Halal restaurant", "Puerto Rican restaurant", "Nuevo Latino restaurant", "Oyster bar restaurant",
            "East African restaurant", "Poke bar", "Soup shop", "Californian restaurant", "Cold noodle restaurant",
            "Dan Dan noodle restaurant", "Udon noodle restaurant", "Popcorn store", "Cuban restaurant", "Colombian restaurant",
            "Tea store", "South Asian restaurant", "Taiwanese restaurant", "Tapas bar", "Vegetarian cafe and deli", "Pakistani restaurant",
            "Greek restaurant", "Mandarin restaurant", "Central American restaurant", "Ecuadorian restaurant", "Small plates restaurant",
            "Hot dog stand", "Gyro restaurant", "Shawarma restaurant", "Fine dining restaurant", "Hoagie restaurant",
            "Lebanese restaurant", "Gastropub", "Neapolitan restaurant", "Southern restaurant (US)", "Honduran restaurant", 
            "Laotian restaurant", "Piano bar", "Fish & chips restaurant", "Contemporary Louisiana restaurant", "Traditional American restaurant",
            "Mexican torta restaurant", "Portuguese restaurant","Irish restaurant", "Irish pub", "Argentinian restaurant",
            "Authentic Japanese restaurant", "Pancake restaurant", "Jewish restaurant", "Teppanyaki restaurant", "Cantonese restaurant","Malaysian restaurant", "Korean barbecue restaurant", "Cookie shop", "Dim sum restaurant", "Brewery", "Raw food restaurant",
            "Yakiniku restaurant", "Moroccan restaurant", "Belgian restaurant", "Dance restaurant", "Falafel restaurant", "Hawaiian restaurant",
            "Scandinavian restaurant","Japanese steakhouse", "Mongolian barbecue restaurant", "Persian restaurant", 
            "Southern Italian restaurant"]

In [75]:
# Se eliminan las categorías que no pertenecen a la selección.
dfgy_categories = dfgy_categories[dfgy_categories["categories"].isin(mantener)]

In [79]:
# Se crea un diccionario para clasificar las categorías.
categories_c = {
    "Fast Food": ["Pizza takeaway","Burgers", "Sandwiches","Food Trucks","Pizza", "Chicken Wings","Wraps","Chicken Shop","Tacos","Hot Dogs","Cheesesteaks","Taco restaurant","Fried chicken takeaway","Pizza restaurant","Pizza delivery","Pizza Takeout","Hamburger restaurant","Sandwich shop","Burrito restaurant","Cheesesteak restaurant","Chicken restaurant","Chicken wings restaurant","Hot dog restaurant","Hot dog stand","Hoagie restaurant"], 
    "Sweets": ["Sundae restaurant","Indian sweets shop","Chinese bakery","Berry restaurant","Sweets and dessert buffet","Japanese sweets restaurant","Japanese cheap sweets shop","Ice Cream & Frozen Yogurt","Beer Bar","Desserts", "Bakeries","Chocolatiers & Shops", "Candy Stores","Cupcakes","Patisserie/Cake Shop","Donuts","Gelato", "Creperies","Waffles","Popcorn Shops","Macarons","Pancakes","Bakery","Donut shop","Ice cream shop","Dessert shop","Dessert restaurant","Cake shop","Cupcake shop","Crêperie","Pretzel store","Frozen yogurt shop","Cookie shop","Pancake restaurant","Popcorn store"], 
    "Themed Bars": ["Wine","Bar tabac","Lounges","Gastropubs","Brewpub","Brewpubs", "Breweries","Cocktail Bars","Beer", "Wine Bars","Wine & Spirits","Beer Gardens","Sports Bars","Sushi Bars","Hookah Bars","Irish Pub","Whiskey Bars","Dive Bars","Gay Bars","Tiki Bars","Distilleries","Piano Bars","Speakeasies","Cigar Bars","Champagne Bars","Drive-Thru Bars","Beer Hall","Cocktail bar","Wine bar","Snack bar","Soft drinks shop","Gay bar","Hookah bar","Food and drink","Vineyard","Karaoke bar","Lounge","Gastropub", "Brewery","Irish pub","Piano bar","Poke bar","Tapas bar"],
    "Coffee & Tea":["Comic cafe","Art cafe","Chocolate cafe","Coffee","Chinese tea house","Cosplay cafe","Juice Bars & Smoothies","Cafes","Coffee Roasteries","Bubble Tea", "Coffee & Tea","Tea Rooms","Internet Cafes","Themed Cafes","Hong Kong Style Cafe","Kombucha","Restaurant or cafe","Bar & grill","Coffee shop","Juice shop","Coffee store","Cafe","Espresso bar","Tea house","Coffee roasters", "Tea store","Bubble tea store"],
    "Sea food":["Seafood","Seafood donburi restaurant","Fish and seafood restaurant","Oyster bar restaurant","Seafood restaurant","Seafood","Angler fish restaurant"],
    "American Food":["Mutton barbecue restaurant","Western restaurant","Chophouse restaurant","Country food restaurant","Po’ boys restaurant","Pacific Northwest restaurant (US)","Mid-Atlantic restaurant (US)","Floridian restaurant","Native American restaurant","American (Traditional)","Southern","Soul Food","American restaurant","Creole restaurant","Soul food restaurant","American (New)","Southwestern restaurant (US)","New American restaurant","Cajun restaurant","Californian restaurant","Southern restaurant (US)","Hawaiian restaurant","Traditional American restaurant","Contemporary Louisiana restaurant"],
    "Mexican food":["Pozole restaurant","Pueblan restaurant","Yucatan restaurant","Mexican","New Mexican Cuisine","Mexican restaurant","Mexican torta restaurant","Tex-Mex restaurant"],
    "French Food":["Provence restaurant","French","Fondue","Brasseries","Modern French restaurant","French restaurant","French steakhouse restaurant", "Haute French restaurant"],
    "Latin-American & Caribbean Food":["Haitian restaurant","South American restaurant","Venezuelan restaurant","Nicaraguan restaurant","Pan-Latin restaurant","Chilean restaurant","Paraguayan restaurant","Uruguayan restaurant","Costa Rican restaurant","Hawaiian","Latin American","Peruvian","Cuban","Dominican","Argentine","Salvadoran","Honduran","Venezuelan", "Colombian","Brazilian","Haitian","Dominican restaurant","Caribbean restaurant","Latin American restaurant","Jamaican restaurant","Guatemalan restaurant","Peruvian restaurant","Brazilian restaurant","Salvadoran restaurant","Puerto Rican restaurant","Nuevo Latino restaurant","Cuban restaurant","Colombian restaurant", "Central American restaurant", "Ecuadorian restaurant","Argentinian restaurant","Honduran restaurant"], 
    "European & Mediterranean Food":["Madrilian restaurant","Welsh restaurant","Scottish restaurant","Valencian restaurant","Sfiha restaurant","Norwegian restaurant","Polish restaurant","Swedish restaurant","Eastern European restaurant","Ukrainian restaurant","Northern Italian restaurant","Fondue restaurant","British restaurant","Georgian restaurant","Modern European restaurant","Romanian restaurant","Dutch restaurant","Basque restaurant","Raclette restaurant","Tuscan restaurant","Croatian restaurant","Lithuanian restaurant","English restaurant","New England restaurant","Bulgarian restaurant","Serbian restaurant","Hungarian restaurant","Swiss restaurant","Austrian restaurant","Pennsylvania Dutch restaurant","Czech restaurant","Roman restaurant","Asturian restaurant","Danish restaurant","Modern British restaurant","Italian","Greek","Mediterranean","Irish","Caribbean","Trinidadian","Puerto Rican","Spanish","Tapas Bars","Tapas/Small Plates","Sardinian","Modern European","Portuguese","German","British","Armenian","Polish","Austrian","Basque","Calabrian","Iberian","Scandinavian","Georgian","Czech", "Scottish", "Roman", "Ukrainian", "Hungarian","Tuscan","Cucina campana","Serbo Croatian","Mediterranean restaurant","Italian restaurant","Spanish restaurant","Bistro","Turkish restaurant","Tapas restaurant","German restaurant","European restaurant","Greek restaurant","Gyro restaurant","Neapolitan restaurant","Fish & chips restaurant","Belgian restaurant","Scandinavian restaurant", "Southern Italian restaurant","Persian restaurant","Irish restaurant","Portuguese restaurant"],
    "Chinese Food":["Shandong restaurant","Jiangsu restaurant","Hong Kong style fast food restaurant","Sichuan restaurant","Chinese food","Shanghainese restaurant","Catalonian restaurant","Shanghainese","Szechuan","Hakka restaurant","Chinese","Dim Sum", "Hot Pot","Cantonese","Hainan","Hot pot restaurant","Chinese restaurant","Chinese noodle restaurant","Hunan restaurant","Delivery Chinese restaurant","Chinese takeaway","Dan Dan noodle restaurant","Mandarin restaurant","Dim sum restaurant","Cantonese restaurant",],
    "Japonese Food":["Yakisoba Restaurant",'Anhui restaurant', 'Sukiyaki restaurant',"Conveyor belt sushi restaurant","Shabu-shabu restaurant","Japanese curry restaurant","Tonkatsu restaurant","Sushi takeaway","Japanese food","Izakaya restaurant","Sukiyaki and Shabu Shabu restaurant","Takoyaki restaurant","Japanese regional restaurant","Kyoto style Japanese restaurant","Yakitori restaurant","Japanized western restaurant","Kushiyaki restaurant","Fugu restaurant","Obanzai restaurant", "Unagi restaurant","Modern izakaya restaurants","Japanese delicatessen","Syokudo and Teishoku restaurant","Anago restaurant","Ramen","Teppanyaki","Japanese Curry","Izakaya","Ramen restaurant","Sushi restaurant","Japanese","Japanese restaurant","Udon noodle restaurant","Japanese steakhouse","Yakiniku restaurant","Teppanyaki restaurant","Authentic Japanese restaurant"],
    "Asian food":["Steamboat restaurant","Sundanese restaurant","South Indian restaurant","Satay restaurant","Chettinad restaurant","Russian restaurant","Wok restaurant","Modern Indian restaurant","Nepalese restaurant","Biryani restaurant","Cambodian restaurant","Indian Muslim restaurant","Kebab shop","Uzbeki restaurant","Burmese restaurant","Momo restaurant","Kerala restaurant","Polynesian restaurant","Yemenite restaurant","Indian sizzler restaurant","Sri Lankan restaurant","Korean grocery store","Korean rib restaurant","Korean beef restaurant","Pacific Rim restaurant","Asian","Soondae restaurant","Goan restaurant","Tibetan restaurant","Kazakhstani restaurant","Turkmen restaurant","Vietnamese","Filipino","Asian Fusion","Thai","Indian","Kebab","Turkish","Laotian","Korean","Ethiopian","Persian/Iranian","Taiwanese","Pan Asian","Oriental","Arabic","Mongolian","Himalayan/Nepalese","Russian","Cambodian","Indonesian","Malaysian","Guamanian","Korean restaurant","Asian restaurant","Indian restaurant","Thai restaurant","Asian fusion restaurant","Dumpling restaurant","Vietnamese restaurant","Southeast Asian restaurant","Indonesian restaurant","Pho restaurant","Filipino restaurant","Singaporean restaurant","Pan-Asian restaurant","Cold noodle restaurant","South Asian restaurant","Taiwanese restaurant","Pakistani restaurant","Shawarma restaurant","Lebanese restaurant","Laotian restaurant","Falafel restaurant","Mongolian barbecue restaurant","Korean barbecue restaurant","Malaysian restaurant"],
    "African Food":['Moroccan',"Cape Verdean restaurant","Armenian restaurant","Egyptian restaurant","Israeli restaurant","Afghani restaurant","Eritrean restaurant","Syrian restaurant","South African restaurant","North African restaurant","Couscous restaurant","Tunisian restaurant","Pakistani","Middle Eastern","African","Lebanese","Falafel","Syrian","Afghan","Senegalese","Somali","Egyptian","Israeli","Uzbek","South African","Middle Eastern restaurant","Ethiopian restaurant","African restaurant","West African restaurant","Moroccan restaurant","East African restaurant"],
    "Vegetarian & Vegan":["Macrobiotic restaurant","Vegan","Vegetarian","Vegetarian cafe and deli","Vegan restaurant","Vegetarian restaurant"],
    "Other specialized food":["Icelandic restaurant","Champon noodle restaurant","Takeout restaurant","Traditional restaurant","Canadian restaurant","Meat dish restaurant","Continental restaurant","Non vegetarian restaurant","BBQ area","Barbecue area","Porridge restaurant","Australian restaurant","Self service restaurant","Tongue restaurant","Tofu restaurant","New Zealand restaurant","Tempura restaurant","Offal barbecue restaurant","Chesapeake restaurant","Breakfast & Brunch","Diners","Steakhouses","Specialty Food","Pasta Shops","Salad","Live/Raw Food","Barbeque","Gluten-Free","Empanadas","Delis","Halal","Soup","Comfort Food", "Acai Bowls","Pretzels","Noodles","Bed & Breakfast","Kosher","Pop-Up Restaurants","Ethical Grocery","Australian","Dinner Theater","Delicatessen","Poutineries","Health food restaurant","Buffet restaurant","Family restaurant","Breakfast restaurant","Lunch restaurant","Bagel shop","Takeout Restaurant","Delivery Restaurant","Barbecue restaurant","Kosher restaurant","Deli","Soup restaurant","Grill","Fusion restaurant","Rice restaurant","Gluten-free restaurant","Steak house","Salad shop","Organic restaurant","Brunch restaurant","Eclectic restaurant","Halal restaurant","Soup shop","Fine dining restaurant","Small plates restaurant","Dance restaurant","Raw food restaurant","Jewish restaurant"]
}

In [80]:
# Se Crea una columna nueva en el dataframe para las claves generalizadas
dfgy_categories['Category_generalized'] = None

# Se Asigna las claves del diccionario "categories_c" a cada categoría en función de las claves definidas
for clave, categorias in categories_c.items():
    dfgy_categories.loc[dfgy_categories['categories'].isin(categorias), 'Category_generalized'] = clave

## Reviews

In [48]:
## Reviews
    # Filtrado
dfy_reviews = dfy_reviews[dfy_reviews['site_id'].isin(dfgy_rest_uniques['business_id'])]

    # Union
dfgy_reviews = pd.concat([dfy_reviews, dfg_reviews])
    # Procesamiento del tipo de dato.
dfgy_reviews[['user_id',"site_id","source"]] = dfgy_reviews[['user_id',"site_id","source"]].astype(str)

## Atributos

In [56]:
## Atributos
    # Filtrado
yelp_site_attributes = yelp_site_attributes[yelp_site_attributes['business_id'].isin(dfgy_rest_uniques['business_id'])]
yelp_site_attributes.rename(columns={"business_id":"site_id"})

    # Union
dfgy_attributes = pd.concat([yelp_site_attributes, atributos_google], ignore_index=True)
# Reemplazar "NaN" por valores nulos
dfgy_attributes.replace('NaN', pd.NA, inplace=True)

# Llenar valores nulos en "business_id" con los valores de "site_id"
dfgy_attributes['business_id'] = dfgy_attributes['business_id'].fillna(dfgy_attributes['site_id'])

# Eliminar la columna "site_id"
dfgy_attributes.drop(columns=['site_id'], inplace=True)

In [59]:
# Se seleccionan los atributos que van a ser unificados o corregidos a través de un diccionario de mapeo.
reemplazo_atributos = {
    "Accessible entrance":['Wheelchair accessible entrance','Wheelchair-accessible entrance',"WheelchairAccessible"],
    "Accessible elevator":["Wheelchair accessible elevator","Wheelchair-accessible lift"],
    "Accessible seating":['Wheelchair accessible seating','Wheelchair-accessible seating'],
    "Accessible restroom":['Wheelchair accessible restroom','Wheelchair-accessible toilet'],
    "Accessible parking":['Wheelchair accessible parking lot','Wheelchair-accessible car park'],
    "Wifi":['WiFi','Wi-Fi'],
    "Delivery&TakeOut":["RestaurantsDelivery",'RestaurantsTakeOut'],
    "Outdoor Seating":['OutdoorSeating'],
    "Accepts Cards":['BusinessAcceptsCreditCards',"BusinessAcceptsBitcoin"],
    "Parking":["BusinessParking",'BikeParking'],
    "Appointment Only":['ByAppointmentOnly'],
    "Reservations":['RestaurantsReservations'],
    "HappyHour":['HappyHour','BestNights'],
    "BYOB":['BYOB', 'Corkage', 'BYOBCorkage'],
    'Good for kids':['Good for kids','GoodForKids'],
    "Sells Alcohol":['Alcohol','Bar onsite','Bar on site'],
    "Restroom":['Restroom','Gender-neutral restroom','Public restroom','Toilets','Gender-neutral toilets','Public toilet'],
    'Baggage storage':['Baggage storage','CoatCheck']

}

# Se seleccionan los atributos que no proporcionan información útil para el proceso de análisis.
eliminar_atributos = ['Caters','RestaurantsPriceRange2','Ambience','RestaurantsTableService','NoiseLevel',
                      'Music','AcceptsInsurance','AgesAllowed','HairSpecializesIn','High chairs', 'Restaurant','Golf course',
                      'Mechanic','Swimming pool', 'All-inclusive', 'Stadium seating']

In [60]:
# Se invierte el diccionario de mapeo para usarlo en el método "replace".
reverse_mapping = {v: k for k, values in reemplazo_atributos.items() for v in values}

# Se reeemplazan los valores en la columna 'atributos' usando el diccionario invertido
dfgy_attributes['attributes'] = dfgy_attributes['attributes'].replace(reverse_mapping)

# Se filtra y eliminan las filas que contienen atributos en eliminar_atributos
dfgy_attributes = dfgy_attributes[~dfgy_attributes['attributes'].str.contains('|'.join(eliminar_atributos))]

# Subida de los datos a BigQuery

In [87]:
# Se crean dos diccionarios para relacionar a cada uno de los dataframes procesados con su tabla correspondiente en BigQuery

# Relacionamiento del DataFrames al ID de la tabla correspondiente en BigQuery
dataframe_to_table_map = {
    "dataframe_restaurantes": table_id_restaurantes,
    "dataframe_usuarios": table_id_usuarios,
    "dataframe_atributos": table_id_atributos,
    "dataframe_reviews": table_id_reviews,
    "dataframe_categorias": table_id_categorias,
}

# Obtención de los DataFrames finales
dataframes = {
    "dataframe_restaurantes": dfgy_rest,
    "dataframe_usuarios": dfgy_user,
    "dataframe_reviews": dfgy_reviews,
    "dataframe_categorias": dfgy_categories,
    "dataframe_atributos": dfgy_attributes
}

In [88]:
# Carga de cada uno de los DataFrame en su tabla correspondiente.

for dataframe_name, table_id in dataframe_to_table_map.items():
    # Obtener el DataFrame desde la variable.
    dataframe = dataframes[dataframe_name]

    # Se configura el trabajo de carga que se utilizará para subir la información a las tablas de BiqQuery.
    job_config = bigquery.LoadJobConfig() # Se define el objeto que permitirá la subida de la información.
    job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE  # Permite sobrescribir la tabla si ya existe.

    # Cargar el DataFrame en la tabla de BigQuery.
    load_job = client.load_table_from_dataframe(dataframe, table_id, job_config=job_config)
    load_job.result()  # Esperar a que el trabajo de carga se complete

    print(f"Successfully loaded {dataframe_name} into table {table_id}")


Successfully loaded dataframe_restaurantes into table proyecto-nuevo-423502.Data_Automatizada_Final.restaurantes
Successfully loaded dataframe_usuarios into table proyecto-nuevo-423502.Data_Automatizada_Final.usuarios
Successfully loaded dataframe_atributos into table proyecto-nuevo-423502.Data_Automatizada_Final.atributos
Successfully loaded dataframe_reviews into table proyecto-nuevo-423502.Data_Automatizada_Final.reviews
Successfully loaded dataframe_categorias into table proyecto-nuevo-423502.Data_Automatizada_Final.categorias
