In [1]:
import pandas as pd
import numpy as np

from sklearn.impute import SimpleImputer
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.impute import KNNImputer

# >> Visualization
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
from scipy.stats import chi2_contingency
from IPython.display import display
from itertools import combinations
from scipy.stats import kstest, spearmanr, pearsonr
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', None)


In [2]:
data = pd.read_csv("spain.csv", index_col=0)
data.head()

Unnamed: 0,restaurant_link,restaurant_name,original_location,country,region,province,city,address,latitude,longitude,claimed,awards,popularity_detailed,popularity_generic,top_tags,price_level,price_range,meals,cuisines,special_diets,features,vegetarian_friendly,vegan_options,gluten_free,original_open_hours,open_days_per_week,open_hours_per_week,working_shifts_per_week,avg_rating,total_reviews_count,default_language,reviews_count_in_default_language,excellent,very_good,average,poor,terrible,food,service,value,atmosphere,keywords
320900,g10021880-d13763192,Taberna La Sacristia,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del Concejo 19, 41870 Aznalcollar Spain",37.51928,-6.26885,Unclaimed,,#4 of 5 Restaurants in Aznalcollar,#4 of 6 places to eat in Aznalcollar,Spanish,,,,Spanish,,,N,N,N,,,,,3.0,1.0,English,1.0,0.0,0.0,1.0,0.0,0.0,,,,,
320901,g10021880-d15758746,Tasca el Capricho,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del 28 de Febrero 9, 41870 Aznalcollar S...",37.52065,-6.26822,Unclaimed,,#3 of 5 Restaurants in Aznalcollar,#3 of 6 places to eat in Aznalcollar,Spanish,,,,Spanish,,,N,N,N,,,,,5.0,2.0,All languages,2.0,2.0,0.0,0.0,0.0,0.0,,,,,
320902,g10021880-d19332558,Bar Las Adelfas,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle Perdon N° 23 Capilla de La Cruz, 41870 A...",37.52428,-6.27144,Claimed,,#5 of 5 Restaurants in Aznalcollar,#5 of 6 places to eat in Aznalcollar,"Mediterranean, Spanish, Grill, Diner",,,"Breakfast, Lunch, Dinner, Brunch, Drinks","Mediterranean, Spanish, Grill, Diner, Dining bars",,,N,N,N,"{""Mon"": [], ""Tue"": [""19:30-23:45""], ""Wed"": [""1...",6.0,51.0,6.0,3.0,2.0,All languages,2.0,1.0,0.0,0.0,0.0,1.0,,,,,
320903,g10021880-d19468788,El Rincon nº 7,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del Concejo 60, 41870 Aznalcollar Spain",37.51714,-6.2686,Claimed,,#1 of 5 Restaurants in Aznalcollar,#1 of 6 places to eat in Aznalcollar,"Mid-range, Steakhouse, Cafe, Spanish",€€-€€€,€2-€18,"Lunch, Dinner, Drinks","Steakhouse, Cafe, Dining bars, Spanish",,,N,N,N,"{""Mon"": [], ""Tue"": [], ""Wed"": [], ""Thu"": [""20:...",4.0,23.0,7.0,5.0,18.0,All languages,18.0,17.0,1.0,0.0,0.0,0.0,,,,,
320904,g10021880-d19847377,Nuevo jacaranda,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del 28 de Febrero 2, 41870 Aznalcollar S...",37.52088,-6.26844,Claimed,,,,,,,,,,Reservations,N,N,N,,,,,,,,,,,,,,,,,,


In [3]:
def change_null_None(df,column_list): # rellena con None
     
        for column in column_list:
            if column in df.columns:

                df[column] = df[column].fillna("None")
            else:
                print(f"Warning: The column '{column}' does not exist in the DataFrame.")
        return df.head()


def change_null_cuisine(df,column_list): # rellena con spanish (categoria dominante) para variable CUISINE.
     
        for column in column_list:
            if column in df.columns:

                df[column] = df[column].fillna("Spanish")
            else:
                print(f"Warning: The column '{column}' does not exist in the DataFrame.")
        return df.head()

def change_null_for_standar_number(df, column_list): # rellena con -1 las columnas numéricas para los nulos.
     
        for column in column_list:
            if column in df.columns:

                df[column] = df[column].fillna(-1)
            else:
                print(f"Warning: The column '{column}' does not exist in the DataFrame.")
        return df.head()

def change_null_for_unknown(df,column_list): # Cuando no existe una categoría dominante en una variable categórica
     
        for column in column_list:
            if column in df.columns:

                df[column] = df[column].fillna("Unknown")
            else:
                print(f"Warning: The column '{column}' does not exist in the DataFrame.")
        return df.head()
    
def change_null_for_mode(df,column_list): # Cuando hay una categoría dominante en variables categóricas
        for column in column_list:
            if column in df.columns:
                mode = df[column].mode()[0]
          
                df[column] = df[column].fillna(mode)
            else:
                print(f"Warning: The column '{column}' does not exist in the DataFrame.")
        return df.head()
    
def change_null_for_mean(df, column_list):  # Cuando tenemos un 0-10% de nulos en una categoría numérica y la distribución es normal
        for column in column_list:
            if column in df.columns:
                    mean= df[column].mean()
              
                    df[column] = df[column].fillna(mean)
            else:
                print(f"Warning: The column '{column}' does not exist in the DataFrame.")
        return df.head()
    
def change_null_for_median(df, column_list):  # Cuando tenemos un 0-10% de nulos en una categoría numérica y la distribución es atípica
        for column in column_list:
            if column in df.columns:
                    median= df[column].median()
                
                    df[column] = df[column].fillna(median)
            else:
                print(f"Warning: The column '{column}' does not exist in the DataFrame.")
        return df.head()

def impute_with_knn(df, column_list, n_neighbors=5): # Cuando tenemos variables numéricas con más 10% de nulos
   
        imputer_knn = KNNImputer(n_neighbors=n_neighbors)

        imputed_data = imputer_knn.fit_transform(df[column_list])

        imputed_df = pd.DataFrame(imputed_data, columns=column_list)

        for column in column_list:
            df[f"{column}_knn"] = imputed_df[column]

        return df.head()

def transform_to_integer(df, column_names):
    """Transform floats to int for a list of columns."""
    for column in column_names:
        df[column] = df[column].astype(int)
    return df.head()

def drop_redundant_columns(df, columns):
        """Drops redundant columns . Param columns is a list of columns to de dropped"""
        df.drop(columns=columns, inplace=True, errors='ignore')


def map_columns_yes_or_no(df, columns):
    dicc = {"N": "No", "Y": "Yes"}

    for column in columns:
        df[column] = df[column].map(dicc)
    
    return df.head()

def quick_check(df, column_names):
    """This function is for testing purposes, to quickly check data type and unique values of a column."""
    
    for column in column_names:
        print(f"Column name: {column}")
        print(f"Data type: {df[column].dtype}")
        print(f"Null count: {df[column].isnull().sum()}")
        print("---------------------------------------------------------------------------------")  
    return df.head()

def convert_price_range(df):

        df["price_level"] = df["price_level"].fillna("€€-€€€")

        conversion_dictionary = {
        "€": "0 - 30€",
        "€€-€€€" : "30 - 80€",
        "€€€€" : "más de 80€"
         }

        # Aplicar la conversión usando .map() para reemplazar todos los valores de la columna
        df["price_range"] = df["price_level"].map(conversion_dictionary)

        return df.head()

       

# COMPROBACIONES

In [4]:
# eliminar columnas innecesarias
columns_to_drop = ["popularity_detailed", "special_diets","default_language", "atmosphere", "keywords" ]
drop_redundant_columns(data,columns_to_drop)
data.shape


(157479, 37)

In [5]:
# cambiar nulos por none
columns_none = ["awards", "top_tags", "features"]
change_null_None(data,columns_none)
quick_check(data,columns_none)

Column name: awards
Data type: object
Null count: 0
---------------------------------------------------------------------------------
Column name: top_tags
Data type: object
Null count: 0
---------------------------------------------------------------------------------
Column name: features
Data type: object
Null count: 0
---------------------------------------------------------------------------------


Unnamed: 0,restaurant_link,restaurant_name,original_location,country,region,province,city,address,latitude,longitude,claimed,awards,popularity_generic,top_tags,price_level,price_range,meals,cuisines,features,vegetarian_friendly,vegan_options,gluten_free,original_open_hours,open_days_per_week,open_hours_per_week,working_shifts_per_week,avg_rating,total_reviews_count,reviews_count_in_default_language,excellent,very_good,average,poor,terrible,food,service,value
320900,g10021880-d13763192,Taberna La Sacristia,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del Concejo 19, 41870 Aznalcollar Spain",37.51928,-6.26885,Unclaimed,,#4 of 6 places to eat in Aznalcollar,Spanish,,,,Spanish,,N,N,N,,,,,3.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,,,
320901,g10021880-d15758746,Tasca el Capricho,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del 28 de Febrero 9, 41870 Aznalcollar S...",37.52065,-6.26822,Unclaimed,,#3 of 6 places to eat in Aznalcollar,Spanish,,,,Spanish,,N,N,N,,,,,5.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,,,
320902,g10021880-d19332558,Bar Las Adelfas,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle Perdon N° 23 Capilla de La Cruz, 41870 A...",37.52428,-6.27144,Claimed,,#5 of 6 places to eat in Aznalcollar,"Mediterranean, Spanish, Grill, Diner",,,"Breakfast, Lunch, Dinner, Brunch, Drinks","Mediterranean, Spanish, Grill, Diner, Dining bars",,N,N,N,"{""Mon"": [], ""Tue"": [""19:30-23:45""], ""Wed"": [""1...",6.0,51.0,6.0,3.0,2.0,2.0,1.0,0.0,0.0,0.0,1.0,,,
320903,g10021880-d19468788,El Rincon nº 7,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del Concejo 60, 41870 Aznalcollar Spain",37.51714,-6.2686,Claimed,,#1 of 6 places to eat in Aznalcollar,"Mid-range, Steakhouse, Cafe, Spanish",€€-€€€,€2-€18,"Lunch, Dinner, Drinks","Steakhouse, Cafe, Dining bars, Spanish",,N,N,N,"{""Mon"": [], ""Tue"": [], ""Wed"": [], ""Thu"": [""20:...",4.0,23.0,7.0,5.0,18.0,18.0,17.0,1.0,0.0,0.0,0.0,,,
320904,g10021880-d19847377,Nuevo jacaranda,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del 28 de Febrero 2, 41870 Aznalcollar S...",37.52088,-6.26844,Claimed,,,,,,,,Reservations,N,N,N,,,,,,,,,,,,,,,


In [6]:
# cambiar nulos por Unknown
columns_unknown = ["latitude", "longitude", "claimed", "popularity_generic", "meals", "original_open_hours"]
change_null_for_unknown(data,columns_unknown)
quick_check(data, columns_unknown)

Column name: latitude
Data type: object
Null count: 0
---------------------------------------------------------------------------------
Column name: longitude
Data type: object
Null count: 0
---------------------------------------------------------------------------------
Column name: claimed
Data type: object
Null count: 0
---------------------------------------------------------------------------------
Column name: popularity_generic
Data type: object
Null count: 0
---------------------------------------------------------------------------------
Column name: meals
Data type: object
Null count: 0
---------------------------------------------------------------------------------
Column name: original_open_hours
Data type: object
Null count: 0
---------------------------------------------------------------------------------


Unnamed: 0,restaurant_link,restaurant_name,original_location,country,region,province,city,address,latitude,longitude,claimed,awards,popularity_generic,top_tags,price_level,price_range,meals,cuisines,features,vegetarian_friendly,vegan_options,gluten_free,original_open_hours,open_days_per_week,open_hours_per_week,working_shifts_per_week,avg_rating,total_reviews_count,reviews_count_in_default_language,excellent,very_good,average,poor,terrible,food,service,value
320900,g10021880-d13763192,Taberna La Sacristia,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del Concejo 19, 41870 Aznalcollar Spain",37.51928,-6.26885,Unclaimed,,#4 of 6 places to eat in Aznalcollar,Spanish,,,Unknown,Spanish,,N,N,N,Unknown,,,,3.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,,,
320901,g10021880-d15758746,Tasca el Capricho,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del 28 de Febrero 9, 41870 Aznalcollar S...",37.52065,-6.26822,Unclaimed,,#3 of 6 places to eat in Aznalcollar,Spanish,,,Unknown,Spanish,,N,N,N,Unknown,,,,5.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,,,
320902,g10021880-d19332558,Bar Las Adelfas,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle Perdon N° 23 Capilla de La Cruz, 41870 A...",37.52428,-6.27144,Claimed,,#5 of 6 places to eat in Aznalcollar,"Mediterranean, Spanish, Grill, Diner",,,"Breakfast, Lunch, Dinner, Brunch, Drinks","Mediterranean, Spanish, Grill, Diner, Dining bars",,N,N,N,"{""Mon"": [], ""Tue"": [""19:30-23:45""], ""Wed"": [""1...",6.0,51.0,6.0,3.0,2.0,2.0,1.0,0.0,0.0,0.0,1.0,,,
320903,g10021880-d19468788,El Rincon nº 7,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del Concejo 60, 41870 Aznalcollar Spain",37.51714,-6.2686,Claimed,,#1 of 6 places to eat in Aznalcollar,"Mid-range, Steakhouse, Cafe, Spanish",€€-€€€,€2-€18,"Lunch, Dinner, Drinks","Steakhouse, Cafe, Dining bars, Spanish",,N,N,N,"{""Mon"": [], ""Tue"": [], ""Wed"": [], ""Thu"": [""20:...",4.0,23.0,7.0,5.0,18.0,18.0,17.0,1.0,0.0,0.0,0.0,,,
320904,g10021880-d19847377,Nuevo jacaranda,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del 28 de Febrero 2, 41870 Aznalcollar S...",37.52088,-6.26844,Claimed,,Unknown,,,,Unknown,,Reservations,N,N,N,Unknown,,,,,,,,,,,,,,


In [7]:
# cambiar nulos por -1
columns_number = ["open_days_per_week", "open_hours_per_week","working_shifts_per_week"]
change_null_for_standar_number(data,columns_number)
quick_check(data, columns_number)

Column name: open_days_per_week
Data type: float64
Null count: 0
---------------------------------------------------------------------------------
Column name: open_hours_per_week
Data type: float64
Null count: 0
---------------------------------------------------------------------------------
Column name: working_shifts_per_week
Data type: float64
Null count: 0
---------------------------------------------------------------------------------


Unnamed: 0,restaurant_link,restaurant_name,original_location,country,region,province,city,address,latitude,longitude,claimed,awards,popularity_generic,top_tags,price_level,price_range,meals,cuisines,features,vegetarian_friendly,vegan_options,gluten_free,original_open_hours,open_days_per_week,open_hours_per_week,working_shifts_per_week,avg_rating,total_reviews_count,reviews_count_in_default_language,excellent,very_good,average,poor,terrible,food,service,value
320900,g10021880-d13763192,Taberna La Sacristia,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del Concejo 19, 41870 Aznalcollar Spain",37.51928,-6.26885,Unclaimed,,#4 of 6 places to eat in Aznalcollar,Spanish,,,Unknown,Spanish,,N,N,N,Unknown,-1.0,-1.0,-1.0,3.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,,,
320901,g10021880-d15758746,Tasca el Capricho,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del 28 de Febrero 9, 41870 Aznalcollar S...",37.52065,-6.26822,Unclaimed,,#3 of 6 places to eat in Aznalcollar,Spanish,,,Unknown,Spanish,,N,N,N,Unknown,-1.0,-1.0,-1.0,5.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,,,
320902,g10021880-d19332558,Bar Las Adelfas,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle Perdon N° 23 Capilla de La Cruz, 41870 A...",37.52428,-6.27144,Claimed,,#5 of 6 places to eat in Aznalcollar,"Mediterranean, Spanish, Grill, Diner",,,"Breakfast, Lunch, Dinner, Brunch, Drinks","Mediterranean, Spanish, Grill, Diner, Dining bars",,N,N,N,"{""Mon"": [], ""Tue"": [""19:30-23:45""], ""Wed"": [""1...",6.0,51.0,6.0,3.0,2.0,2.0,1.0,0.0,0.0,0.0,1.0,,,
320903,g10021880-d19468788,El Rincon nº 7,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del Concejo 60, 41870 Aznalcollar Spain",37.51714,-6.2686,Claimed,,#1 of 6 places to eat in Aznalcollar,"Mid-range, Steakhouse, Cafe, Spanish",€€-€€€,€2-€18,"Lunch, Dinner, Drinks","Steakhouse, Cafe, Dining bars, Spanish",,N,N,N,"{""Mon"": [], ""Tue"": [], ""Wed"": [], ""Thu"": [""20:...",4.0,23.0,7.0,5.0,18.0,18.0,17.0,1.0,0.0,0.0,0.0,,,
320904,g10021880-d19847377,Nuevo jacaranda,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del 28 de Febrero 2, 41870 Aznalcollar S...",37.52088,-6.26844,Claimed,,Unknown,,,,Unknown,,Reservations,N,N,N,Unknown,-1.0,-1.0,-1.0,,,,,,,,,,,


In [8]:
# cambiar columna cuisine por Spanish - categoria dominante
change_null_cuisine(data,["cuisines"])
quick_check(data,["cuisines"])


Column name: cuisines
Data type: object
Null count: 0
---------------------------------------------------------------------------------


Unnamed: 0,restaurant_link,restaurant_name,original_location,country,region,province,city,address,latitude,longitude,claimed,awards,popularity_generic,top_tags,price_level,price_range,meals,cuisines,features,vegetarian_friendly,vegan_options,gluten_free,original_open_hours,open_days_per_week,open_hours_per_week,working_shifts_per_week,avg_rating,total_reviews_count,reviews_count_in_default_language,excellent,very_good,average,poor,terrible,food,service,value
320900,g10021880-d13763192,Taberna La Sacristia,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del Concejo 19, 41870 Aznalcollar Spain",37.51928,-6.26885,Unclaimed,,#4 of 6 places to eat in Aznalcollar,Spanish,,,Unknown,Spanish,,N,N,N,Unknown,-1.0,-1.0,-1.0,3.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,,,
320901,g10021880-d15758746,Tasca el Capricho,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del 28 de Febrero 9, 41870 Aznalcollar S...",37.52065,-6.26822,Unclaimed,,#3 of 6 places to eat in Aznalcollar,Spanish,,,Unknown,Spanish,,N,N,N,Unknown,-1.0,-1.0,-1.0,5.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,,,
320902,g10021880-d19332558,Bar Las Adelfas,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle Perdon N° 23 Capilla de La Cruz, 41870 A...",37.52428,-6.27144,Claimed,,#5 of 6 places to eat in Aznalcollar,"Mediterranean, Spanish, Grill, Diner",,,"Breakfast, Lunch, Dinner, Brunch, Drinks","Mediterranean, Spanish, Grill, Diner, Dining bars",,N,N,N,"{""Mon"": [], ""Tue"": [""19:30-23:45""], ""Wed"": [""1...",6.0,51.0,6.0,3.0,2.0,2.0,1.0,0.0,0.0,0.0,1.0,,,
320903,g10021880-d19468788,El Rincon nº 7,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del Concejo 60, 41870 Aznalcollar Spain",37.51714,-6.2686,Claimed,,#1 of 6 places to eat in Aznalcollar,"Mid-range, Steakhouse, Cafe, Spanish",€€-€€€,€2-€18,"Lunch, Dinner, Drinks","Steakhouse, Cafe, Dining bars, Spanish",,N,N,N,"{""Mon"": [], ""Tue"": [], ""Wed"": [], ""Thu"": [""20:...",4.0,23.0,7.0,5.0,18.0,18.0,17.0,1.0,0.0,0.0,0.0,,,
320904,g10021880-d19847377,Nuevo jacaranda,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del 28 de Febrero 2, 41870 Aznalcollar S...",37.52088,-6.26844,Claimed,,Unknown,,,,Unknown,Spanish,Reservations,N,N,N,Unknown,-1.0,-1.0,-1.0,,,,,,,,,,,


In [9]:
# Cambiar columnas a YES o NO
columns_yes_no = ["vegetarian_friendly", "vegan_options", "gluten_free"]
map_columns_yes_or_no(data, columns_yes_no)
quick_check(data, columns_yes_no)

Column name: vegetarian_friendly
Data type: object
Null count: 0
---------------------------------------------------------------------------------
Column name: vegan_options
Data type: object
Null count: 0
---------------------------------------------------------------------------------
Column name: gluten_free
Data type: object
Null count: 0
---------------------------------------------------------------------------------


Unnamed: 0,restaurant_link,restaurant_name,original_location,country,region,province,city,address,latitude,longitude,claimed,awards,popularity_generic,top_tags,price_level,price_range,meals,cuisines,features,vegetarian_friendly,vegan_options,gluten_free,original_open_hours,open_days_per_week,open_hours_per_week,working_shifts_per_week,avg_rating,total_reviews_count,reviews_count_in_default_language,excellent,very_good,average,poor,terrible,food,service,value
320900,g10021880-d13763192,Taberna La Sacristia,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del Concejo 19, 41870 Aznalcollar Spain",37.51928,-6.26885,Unclaimed,,#4 of 6 places to eat in Aznalcollar,Spanish,,,Unknown,Spanish,,No,No,No,Unknown,-1.0,-1.0,-1.0,3.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,,,
320901,g10021880-d15758746,Tasca el Capricho,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del 28 de Febrero 9, 41870 Aznalcollar S...",37.52065,-6.26822,Unclaimed,,#3 of 6 places to eat in Aznalcollar,Spanish,,,Unknown,Spanish,,No,No,No,Unknown,-1.0,-1.0,-1.0,5.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,,,
320902,g10021880-d19332558,Bar Las Adelfas,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle Perdon N° 23 Capilla de La Cruz, 41870 A...",37.52428,-6.27144,Claimed,,#5 of 6 places to eat in Aznalcollar,"Mediterranean, Spanish, Grill, Diner",,,"Breakfast, Lunch, Dinner, Brunch, Drinks","Mediterranean, Spanish, Grill, Diner, Dining bars",,No,No,No,"{""Mon"": [], ""Tue"": [""19:30-23:45""], ""Wed"": [""1...",6.0,51.0,6.0,3.0,2.0,2.0,1.0,0.0,0.0,0.0,1.0,,,
320903,g10021880-d19468788,El Rincon nº 7,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del Concejo 60, 41870 Aznalcollar Spain",37.51714,-6.2686,Claimed,,#1 of 6 places to eat in Aznalcollar,"Mid-range, Steakhouse, Cafe, Spanish",€€-€€€,€2-€18,"Lunch, Dinner, Drinks","Steakhouse, Cafe, Dining bars, Spanish",,No,No,No,"{""Mon"": [], ""Tue"": [], ""Wed"": [], ""Thu"": [""20:...",4.0,23.0,7.0,5.0,18.0,18.0,17.0,1.0,0.0,0.0,0.0,,,
320904,g10021880-d19847377,Nuevo jacaranda,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del 28 de Febrero 2, 41870 Aznalcollar S...",37.52088,-6.26844,Claimed,,Unknown,,,,Unknown,Spanish,Reservations,No,No,No,Unknown,-1.0,-1.0,-1.0,,,,,,,,,,,


In [10]:

# cambia los rangos de precios de columnas price_level y price_range
convert_price_range(data)

Unnamed: 0,restaurant_link,restaurant_name,original_location,country,region,province,city,address,latitude,longitude,claimed,awards,popularity_generic,top_tags,price_level,price_range,meals,cuisines,features,vegetarian_friendly,vegan_options,gluten_free,original_open_hours,open_days_per_week,open_hours_per_week,working_shifts_per_week,avg_rating,total_reviews_count,reviews_count_in_default_language,excellent,very_good,average,poor,terrible,food,service,value
320900,g10021880-d13763192,Taberna La Sacristia,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del Concejo 19, 41870 Aznalcollar Spain",37.51928,-6.26885,Unclaimed,,#4 of 6 places to eat in Aznalcollar,Spanish,€€-€€€,30 - 80€,Unknown,Spanish,,No,No,No,Unknown,-1.0,-1.0,-1.0,3.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,,,
320901,g10021880-d15758746,Tasca el Capricho,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del 28 de Febrero 9, 41870 Aznalcollar S...",37.52065,-6.26822,Unclaimed,,#3 of 6 places to eat in Aznalcollar,Spanish,€€-€€€,30 - 80€,Unknown,Spanish,,No,No,No,Unknown,-1.0,-1.0,-1.0,5.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,,,
320902,g10021880-d19332558,Bar Las Adelfas,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle Perdon N° 23 Capilla de La Cruz, 41870 A...",37.52428,-6.27144,Claimed,,#5 of 6 places to eat in Aznalcollar,"Mediterranean, Spanish, Grill, Diner",€€-€€€,30 - 80€,"Breakfast, Lunch, Dinner, Brunch, Drinks","Mediterranean, Spanish, Grill, Diner, Dining bars",,No,No,No,"{""Mon"": [], ""Tue"": [""19:30-23:45""], ""Wed"": [""1...",6.0,51.0,6.0,3.0,2.0,2.0,1.0,0.0,0.0,0.0,1.0,,,
320903,g10021880-d19468788,El Rincon nº 7,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del Concejo 60, 41870 Aznalcollar Spain",37.51714,-6.2686,Claimed,,#1 of 6 places to eat in Aznalcollar,"Mid-range, Steakhouse, Cafe, Spanish",€€-€€€,30 - 80€,"Lunch, Dinner, Drinks","Steakhouse, Cafe, Dining bars, Spanish",,No,No,No,"{""Mon"": [], ""Tue"": [], ""Wed"": [], ""Thu"": [""20:...",4.0,23.0,7.0,5.0,18.0,18.0,17.0,1.0,0.0,0.0,0.0,,,
320904,g10021880-d19847377,Nuevo jacaranda,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del 28 de Febrero 2, 41870 Aznalcollar S...",37.52088,-6.26844,Claimed,,Unknown,,€€-€€€,30 - 80€,Unknown,Spanish,Reservations,No,No,No,Unknown,-1.0,-1.0,-1.0,,,,,,,,,,,


In [11]:
columns_modify = [
    "avg_rating", 
    "total_reviews_count", 
    "reviews_count_in_default_language",
    "excellent",
    "very_good",
    "average",
    "poor",
    "terrible",
    "food",
    "service",
    "value"
]

for col in columns_modify:
    # Calcula la media y mediana de la columna en el DataFrame
    mean_value = data[col].mean()
    median_value = data[col].median()
    
    # Imprime los resultados
    print(f"Column: {col}")
    print(f"Mean: {mean_value}")
    print(f"Median: {median_value}")
    print("----------------------------------------------------------------------------------------")

Column: avg_rating
Mean: 3.959885888916926
Median: 4.0
----------------------------------------------------------------------------------------
Column: total_reviews_count
Mean: 98.69622706259968
Median: 24.0
----------------------------------------------------------------------------------------
Column: reviews_count_in_default_language
Mean: 29.36164573250902
Median: 6.0
----------------------------------------------------------------------------------------
Column: excellent
Mean: 16.810711388368254
Median: 3.0
----------------------------------------------------------------------------------------
Column: very_good
Mean: 6.86500265756567
Median: 1.0
----------------------------------------------------------------------------------------
Column: average
Mean: 2.497622178084874
Median: 0.0
----------------------------------------------------------------------------------------
Column: poor
Mean: 1.27316977648474
Median: 0.0
------------------------------------------------------------

In [12]:
# imputar nulos por media
columns_mean = ["avg_rating", "average","poor","terrible"]
change_null_for_mean(data, columns_mean)

# print for tasting
for column in columns_mean:
    quick_check(data, columns_mean)
    print("----------------------------------------------")

Column name: a


KeyError: 'a'

In [None]:
# imputar nulos por mediana
columns_median = ["total_reviews_count", "reviews_count_in_default_language", "excellent","very_good"]
change_null_for_mean(data, columns_median)

# print for tasting
for column in columns_median:
    quick_check(data, columns_median)
    print("----------------------------------------------")



In [None]:
# imputar nulos por KNN
columns_knn = ["food","service","value"]
impute_with_knn(data,columns_knn)
quick_check(data, columns_knn)

In [None]:
# Cambiar columnas de float a int , lo hacemos lo último porque sino dan errores los nulos.
columnns_int = ["open_days_per_week", "open_hours_per_week","working_shifts_per_week", "avg_rating", "total_reviews_count", "reviews_count_in_default_language",
    "excellent",
    "very_good",
    "average",
    "poor",
    "terrible",
    "food",
    "service",
    "value"]

transform_to_integer(data,columnns_int)