In [33]:
# Bibliotecas
import pandas as pd
import inflection

In [34]:
# Importando Dataset
df_root = pd.read_csv('dataset/zomato.csv')

# Fazendo cópia do dataframe lido
df = df_root.copy()
df.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6310675,Mama Lou's Italian Kitchen,162,Las Piñas City,"Block 1, Lot 36, Tropical Avenue Corner Tropic...",BF International,"BF International, Las Piñas City",121.009787,14.447615,Italian,...,Botswana Pula(P),1,0,0,0,3,4.6,3F7E00,Excellent,619
1,6310675,Mama Lou's Italian Kitchen,162,Las Piñas City,"Block 1, Lot 36, Tropical Avenue Corner Tropic...",BF International,"BF International, Las Piñas City",121.009787,14.447615,Italian,...,Botswana Pula(P),1,0,0,0,3,4.6,3F7E00,Excellent,619
2,6314542,Blackbird,162,Makati City,"Nielson Tower, Ayala Triangle Gardens, Salcedo...","Ayala Triangle Gardens, Salcedo Village, Makat...","Ayala Triangle Gardens, Salcedo Village, Makat...",121.024562,14.556042,"European, Asian",...,Botswana Pula(P),0,0,0,0,4,4.7,3F7E00,Excellent,469
3,6301293,Banapple,162,Makati City,"Ayala Triangle Gardens, Salcedo Village, Makat...","Ayala Triangle Gardens, Salcedo Village, Makat...","Ayala Triangle Gardens, Salcedo Village, Makat...",121.023171,14.556196,"Filipino, American, Italian, Bakery",...,Botswana Pula(P),0,0,0,0,3,4.4,5BA829,Very Good,867
4,6315689,Bad Bird,162,Makati City,"Hole In The Wall, Floor 4, Century City Mall, ...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027708,14.565899,American,...,Botswana Pula(P),0,0,0,0,3,4.4,5BA829,Very Good,858


In [35]:
# Funções
# Função para converter números em nomes de Países
def country_name(country_id):
    COUNTRIES = {
        1: "India",
        14: "Australia",
        30: "Brazil",
        37: "Canada",
        94: "Indonesia",
        148: "New Zeland",
        162: "Philippines",
        166: "Qatar",
        184: "Singapure",
        189: "South Africa",
        191: "Sri Lanka",
        208: "Turkey",
        214: "United Arab Emirates",
        215: "England",
        216: "United States of America",
    }
    return COUNTRIES[country_id]

# Função para converter números em tipos de preços
def create_price_type(price_range):
    if price_range == 1:
        return "cheap"
    elif price_range == 2:
        return "normal"
    elif price_range == 3:
        return "expensive"
    else:
        return "gourmet"

# Função para converter números RGB em nomes de cores
def color_name(color_code):
    COLORS = {
        "3F7E00": "darkgreen",
        "5BA829": "green",
        "9ACD32": "lightgreen",
        "CDD614": "orange",
        "FFBA00": "red",
        "CBCBC8": "darkred",
        "FF7800": "darkred",
    }
    return COLORS[color_code]

# Função para renomear as colunas
def rename_columns(dataframe):
    df = dataframe.copy()
    title = lambda x: inflection.titleize(x)
    snakecase = lambda x: inflection.underscore(x)
    spaces = lambda x: x.replace(" ", "")
    cols_old = list(df.columns)
    cols_old = list(map(title, cols_old))
    cols_old = list(map(spaces, cols_old))
    cols_new = list(map(snakecase, cols_old))
    df.columns = cols_new
    return df

In [36]:
# Limpando as colunas 'Switch to order menu' e 'Locality Verbose' 
df = df.drop(columns=['Switch to order menu', 'Locality Verbose'])

# Convertendo números em nomes na coluna 'Country Code', e mudando seu nome para 'Country Name'
df['Country Code'] = df['Country Code'].apply(country_name)
df.rename(columns = {'Country Code':'Country Name'}, inplace = True)

# Convertendo números em nomes na coluna 'Price range'
df['Price range'] = df['Price range'].apply(create_price_type)

# Convertendo números RGB em nomes de cores
df['Rating color'] = df['Rating color'].apply(color_name)

# Renomeando colunas 
df = rename_columns(df)

# Categorizando os restaurantes por somente um tipo de cozinha
df["cuisines"] = df.loc[:, "cuisines"].astype(str).apply(lambda x: x.split(",")[0])

# Retirando valores NaN
linhas_selecionadas = ((df['cuisines'] != 'nan') & (df['cuisines'] != 'Mineira') & (df['cuisines'] != 'Drinks Only') )
df = df.loc[linhas_selecionadas, :].copy()

# Removendo linhas duplicadas
df = df.drop_duplicates()

In [37]:
df.head(5)

Unnamed: 0,restaurant_id,restaurant_name,country_name,city,address,locality,longitude,latitude,cuisines,average_cost_for_two,currency,has_table_booking,has_online_delivery,is_delivering_now,price_range,aggregate_rating,rating_color,rating_text,votes
0,6310675,Mama Lou's Italian Kitchen,Philippines,Las Piñas City,"Block 1, Lot 36, Tropical Avenue Corner Tropic...",BF International,121.009787,14.447615,Italian,1100,Botswana Pula(P),1,0,0,expensive,4.6,darkgreen,Excellent,619
2,6314542,Blackbird,Philippines,Makati City,"Nielson Tower, Ayala Triangle Gardens, Salcedo...","Ayala Triangle Gardens, Salcedo Village, Makat...",121.024562,14.556042,European,3100,Botswana Pula(P),0,0,0,gourmet,4.7,darkgreen,Excellent,469
3,6301293,Banapple,Philippines,Makati City,"Ayala Triangle Gardens, Salcedo Village, Makat...","Ayala Triangle Gardens, Salcedo Village, Makat...",121.023171,14.556196,Filipino,800,Botswana Pula(P),0,0,0,expensive,4.4,green,Very Good,867
4,6315689,Bad Bird,Philippines,Makati City,"Hole In The Wall, Floor 4, Century City Mall, ...","Century City Mall, Poblacion, Makati City",121.027708,14.565899,American,700,Botswana Pula(P),0,0,0,expensive,4.4,green,Very Good,858
5,6304833,Manam,Philippines,Makati City,"Level 1, Greenbelt 2, Ayala Center, Greenbelt,...","Greenbelt 2, San Lorenzo, Makati City",121.02038,14.552351,Filipino,700,Botswana Pula(P),0,0,0,expensive,4.7,darkgreen,Excellent,930


In [38]:
df.to_csv('clean_zomato.csv', sep=',', index=False, encoding='utf-8')