# <h1 style="text-align: left; color: #FFB347; font-family: 'Bahnschrift Condensed', sans-serif; font-size: 2.5em;">Connecting to MySQL Workbench
</h1>


In [1260]:
# 📚 Basic libraries
import pandas as pd # data manipulation
import numpy as np # numerical operations
import warnings # warning messages management
import matplotlib.pyplot as plt # plotting
import seaborn as sns # plotting
colors = ['#FF7F7F', '#FFF9C4', '#A5D6A7', '#81D4FA', '#F1F1F1']

# 📊 Statistical libraries
from scipy.stats import chi2_contingency # 🧮 Chi-squared test for independence
from scipy.stats.contingency import association # 📈 Measures association strength between categorical variables
import scipy.stats as st # ⚙️ General statistical functions
import statsmodels.api as sm # 📐 Advanced statistical models and testing
import statsmodels.formula.api as smf # 📝 Formula-based interface for statistical models

# Set the color palette for all plots by default
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=colors)
# ⚙️ Settings
pd.set_option('display.max_columns', None) # display all columns
warnings.filterwarnings('ignore') # ignore warnings

# 🌐 New Libraries
from getpass import getpass # get password without showing it
import pymysql # MySQL connection
import requests # HTTP requests
import time # time operations

# ⚙️ Settings
pd.set_option('display.max_columns', None) # display all columns
warnings.filterwarnings('ignore') # ignore warnings

def create_connection(): # create a connection to a MySQL database
    """
    Create a connection to a MySQL database.
    Prompts the user for the database password and database name.
    
    Returns:
        cnx: pymysql.connections.Connection object
            The connection object to the MySQL database.
    """
    password = getpass("Please, kindly insert your password: ")
    database = input("Please, kindly insert your database name: ")
    cnx = pymysql.connect(user='root', password=password,
                          host='localhost', database=database)
    if cnx.open:
        print("Connection open")
    else:
        print("Connection is not successfully open")
    return cnx

In [1261]:
# We enter the password and the database 
connection = create_connection()

Connection open


## <h1 style="text-align: left; color: #FFB347; font-family: 'Bahnschrift', sans-serif; font-size: 1.5em;">Testing the connection
</h1>


In [1262]:
query = "SELECT * FROM location l JOIN rest_cui rc USING (placeID) JOIN if_parking USING (placeID) JOIN rest_pay USING (placeID) JOIN opening USING(placeID);"
df_location = pd.read_sql(query, connection)
df_location.head()

Unnamed: 0,placeID,latitude,longitude,the_geom_meter,name,address,city,state,country,fax,zip,alcohol,smoking_area,dress_code,accessibility,price,url,Rambience,franchise,area,other_services,Rcuisine,parking_lot,Rpayment,hours,days
0,135109,18.921785,-99.23535,0101000020957F0000A6BF695F136F5AC1DADF87B20556...,Paniroles,?,?,?,?,?,?,Wine-Beer,not permitted,informal,no_accessibility,medium,?,quiet,f,closed,Internet,Italian,none,cash,08:00-21:00;,Mon;Tue;Wed;Thu;Fri;
1,135109,18.921785,-99.23535,0101000020957F0000A6BF695F136F5AC1DADF87B20556...,Paniroles,?,?,?,?,?,?,Wine-Beer,not permitted,informal,no_accessibility,medium,?,quiet,f,closed,Internet,Italian,none,cash,08:00-21:00;,Sat;
2,135109,18.921785,-99.23535,0101000020957F0000A6BF695F136F5AC1DADF87B20556...,Paniroles,?,?,?,?,?,?,Wine-Beer,not permitted,informal,no_accessibility,medium,?,quiet,f,closed,Internet,Italian,none,cash,08:00-21:00;,Sun;
3,135106,22.149709,-100.976093,0101000020957F0000649D6F21634858C119AE9BF528A3...,El Rinc�n de San Francisco,Universidad 169,San Luis Potosi,San Luis Potosi,Mexico,?,78000,Wine-Beer,only at bar,informal,partially,medium,?,familiar,f,open,none,Mexican,none,MasterCard-Eurocard,18:00-23:30;,Mon;Tue;Wed;Thu;Fri;
4,135106,22.149709,-100.976093,0101000020957F0000649D6F21634858C119AE9BF528A3...,El Rinc�n de San Francisco,Universidad 169,San Luis Potosi,San Luis Potosi,Mexico,?,78000,Wine-Beer,only at bar,informal,partially,medium,?,familiar,f,open,none,Mexican,none,VISA,18:00-23:30;,Mon;Tue;Wed;Thu;Fri;


As with the customer dataframe, we will remove duplicates based on the restaurant ID to avoid overrepresentation of those that have multiple types of parking, payment methods, or more than one business style.

In [1263]:
df_location.drop_duplicates(subset="placeID", inplace=True)

# <h1 style="text-align: left; color: #FFB347; font-family: 'Bahnschrift Condensed', sans-serif; font-size: 2.5em;">Cleaning column by column
</h1>


## <h1 style="text-align: left; color: #FFB347; font-family: 'Bahnschrift', sans-serif; font-size: 1.5em;">City</h1>


In [1264]:
df_location.loc[df_location["city"] == "San Luis Potosi.", "city"] = "San Luis Potosi"

In [1265]:
df_location.city.value_counts()

city
San Luis Potosi     46
?                   11
Cuernavaca           6
san luis potosi      5
victoria             5
Ciudad Victoria      2
Soledad              1
san luis potos       1
san luis potosi      1
s.l.p                1
cuernavaca           1
s.l.p.               1
Jiutepec             1
Cd Victoria          1
Cd. Victoria         1
victoria             1
Name: count, dtype: int64

In [1266]:
df_location['city'] = df_location['city'].str.replace(r'\b.*San Luis Potosi.    .*\b', 'San Luis Potosi', regex=True)


In [1267]:
df_location['city'] = df_location['city'].str.replace(r'\b.*San Luis Potosi .*\b', 'San Luis Potosi', regex=True)


In [1268]:
df_location['city'] = df_location['city'].str.replace(r'\b.*Potos.*\b', 'San Luis Potosi', regex=True)


In [1269]:
df_location['city'] = df_location['city'].str.replace(r'\b.*s.l.p.*\b', 'San Luis Potosi', regex=True)


In [1270]:
df_location['city'] = df_location['city'].str.replace(r'\b.*San Luis Potosi .*\b', 'San Luis Potosi', regex=True)


In [1271]:
df_location['city'] = df_location['city'].str.replace(r'\bSan Luis Potosi\.\b', 'San Luis Potosi', regex=True)


In [1272]:
df_location['city'] = df_location['city'].str.strip()

In [1273]:
df_location['city'] = df_location['city'].str.replace(r'\b.*ictoria.*\b', 'Ciudad Victoria', regex=True)


In [1274]:

df_location['city'] = df_location['city'].str.strip()  # Eliminar espacios al inicio y final
df_location['city'] = df_location['city'].str.lower()  # Convertir a minúsculas


df_location['city'] = df_location['city'].str.replace(r'\bsan luis potosi[\.\s]*\b', 'san luis potosi', regex=True)


df_location['city'] = df_location['city'].str.title()

In [1275]:
df_location['city'] = df_location['city'].str.replace(r'\b.*Slp.*\b', 'San Luis Potosi', regex=True)


In [1276]:
df_location.city = df_location.city.replace("?", None).ffill()

In [1277]:
df_location.loc[df_location["city"] == "San Luis Potosi.", "city"] = "San Luis Potosi"

In [1278]:
df_location['city'] = df_location['city'].str.replace(r'.*Potos.*', 'San Luis Potosi', regex=True)

In [1279]:
df_location.city.value_counts()

city
San Luis Potosi    60
Ciudad Victoria    15
Cuernavaca          7
Soledad             1
Jiutepec            1
Name: count, dtype: int64

## <h1 style="text-align: left; color: #FFB347; font-family: 'Bahnschrift', sans-serif; font-size: 1.5em;">State</h1>


In [1280]:
df_location.state.value_counts()

state
SLP                32
San Luis Potosi    14
?                  10
Morelos             7
Tamaulipas          7
san luis potosi     4
tamaulipas          3
S.L.P.              2
mexico              2
san luis potos      1
slp                 1
s.l.p.              1
morelos             1
Name: count, dtype: int64

In [1281]:
df_location.state = df_location.state.replace("?", None).ffill()

In [1282]:
df_location.state.value_counts()

state
SLP                32
San Luis Potosi    18
Tamaulipas          9
Morelos             7
san luis potosi     7
tamaulipas          3
S.L.P.              2
mexico              2
san luis potos      1
slp                 1
s.l.p.              1
morelos             1
Name: count, dtype: int64

In [1283]:
df_location['state'] = df_location['state'].str.replace(r'\b.*S.L.*\b', 'San Luis Potosi', regex=True)


In [1284]:
df_location['state'] = df_location['state'].str.replace(r'\b.*SLP.*\b', 'San Luis Potosi', regex=True)


In [1285]:
df_location['state'] = df_location['state'].str.replace(r'\b.*slp.*\b', 'San Luis Potosi', regex=True)


In [1286]:
df_location['state'] = df_location['state'].str.replace(r'\b.*s.l.p.*\b', 'San Luis Potosi', regex=True)


In [1287]:
df_location['state'] = df_location['state'].str.replace(r'\b.*uis*\b', 'San Luis Potosi', regex=True)


In [1288]:
df_location["state"] = df_location["state"].str.replace(
    r"^(San Luis Potosi) \w+\b", r"\1", regex=True
)

In [1289]:
df_location['state'] = df_location['state'].str.replace(r'\b.*amaulipas.*\b', 'Tamaulipas', regex=True)


In [1290]:
df_location['state'] = df_location['state'].str.replace(r'\b.*orelos.*\b', 'Morelos', regex=True)


In [1291]:
df_location[df_location["state"] == "mexico"]

Unnamed: 0,placeID,latitude,longitude,the_geom_meter,name,address,city,state,country,fax,zip,alcohol,smoking_area,dress_code,accessibility,price,url,Rambience,franchise,area,other_services,Rcuisine,parking_lot,Rpayment,hours,days
399,132955,22.147622,-101.010275,0101000020957F000068BE7C87C24758C1920A360A08AD...,emilianos,venustiano carranza,San Luis Potosi,mexico,?,?,?,Wine-Beer,none,informal,completely,low,?,familiar,t,closed,variety,Bar_Pub_Brewery,none,Carte_Blanche,00:00-23:30;,Mon;Tue;Wed;Thu;Fri;
459,132872,22.173596,-100.994603,0101000020957F0000942C3434384458C1C2D3499804A7...,Pizzeria Julios,?,San Luis Potosi,mexico,?,?,?,No_Alcohol_Served,not permitted,informal,partially,medium,?,familiar,f,closed,none,American,public,bank_debit_cards,09:00-21:00;,Mon;Tue;Wed;Thu;Fri;


In [1292]:
df_location.state = df_location.state.apply(lambda x: "San Luis Potosi" if x == "mexico" else x)

In [1293]:
df_location['state'] = df_location['state'].str.replace(r'.*Potos.*', 'San Luis Potosi', regex=True)

In [1294]:
df_location.state.value_counts()

state
San Luis Potosi    64
Tamaulipas         12
Morelos             8
Name: count, dtype: int64

## <h1 style="text-align: left; color: #FFB347; font-family: 'Bahnschrift', sans-serif; font-size: 1.5em;">Country</h1>


In [1295]:
df_location['country'] = 'Mexico'

In [1296]:
df_location.country.value_counts()

country
Mexico    85
Name: count, dtype: int64

## <h1 style="text-align: left; color: #FFB347; font-family: 'Bahnschrift', sans-serif; font-size: 1.5em;">Drink level</h1>


In [1297]:
df_location.alcohol.value_counts()

alcohol
No_Alcohol_Served    56
Wine-Beer            22
Full_Bar              7
Name: count, dtype: int64

In [1298]:
df_location['alcohol'] = df_location['alcohol'].str.replace(r'\b.*No_Alcohol_Served.*\b', 'No alcohol', regex=True)


In [1299]:
df_location['alcohol'] = df_location['alcohol'].str.replace(r'\b.*Wine-Beer.*\b', 'Wine/ Beer', regex=True)


In [1300]:
df_location['alcohol'] = df_location['alcohol'].str.replace(r'\b.*Full_Bar.*\b', 'Full Bar', regex=True)


In [1301]:
df_location.alcohol.value_counts()

alcohol
No alcohol    56
Wine/ Beer    22
Full Bar       7
Name: count, dtype: int64

## <h1 style="text-align: left; color: #FFB347; font-family: 'Bahnschrift', sans-serif; font-size: 1.5em;">Smoking Area</h1>


In [1302]:
df_location['smoking_area'] = df_location['smoking_area'].str.capitalize()

In [1303]:
df_location.smoking_area.value_counts()

smoking_area
None             36
Not permitted    24
Section          15
Permitted         9
Only at bar       1
Name: count, dtype: int64

## <h1 style="text-align: left; color: #FFB347; font-family: 'Bahnschrift', sans-serif; font-size: 1.5em;">Dress code</h1>


In [1304]:
df_location['dress_code'] = df_location['dress_code'].str.capitalize()

In [1305]:
df_location.dress_code.value_counts()

dress_code
Informal    76
Casual       8
Formal       1
Name: count, dtype: int64

## <h1 style="text-align: left; color: #FFB347; font-family: 'Bahnschrift', sans-serif; font-size: 1.5em;">Accesibility</h1>


In [1306]:
df_location['accessibility'] = df_location['accessibility'].str.capitalize()

In [1307]:
df_location['accessibility'] = df_location['accessibility'].str.replace(r'\b.*No_accessibility.*\b', 'No accessibility', regex=True)


In [1308]:
df_location.accessibility.value_counts()

accessibility
No accessibility    47
Completely          30
Partially            8
Name: count, dtype: int64

## <h1 style="text-align: left; color: #FFB347; font-family: 'Bahnschrift', sans-serif; font-size: 1.5em;">Price</h1>


In [1309]:
df_location['price'] = df_location['price'].str.capitalize()

In [1310]:
df_location = df_location.applymap(lambda x: str(x).replace('_', ' ').title())

In [1311]:
df_location.price.value_counts()

price
Medium    46
Low       27
High      12
Name: count, dtype: int64

## <h1 style="text-align: left; color: #FFB347; font-family: 'Bahnschrift', sans-serif; font-size: 1.5em;">Rambience</h1>


In [1312]:
df_location.Rambience.value_counts()

Rambience
Familiar    76
Quiet        9
Name: count, dtype: int64

## <h1 style="text-align: left; color: #FFB347; font-family: 'Bahnschrift', sans-serif; font-size: 1.5em;">Franchise</h1>


In [1313]:
df_location.franchise.value_counts()

franchise
F    67
T    18
Name: count, dtype: int64

## <h1 style="text-align: left; color: #FFB347; font-family: 'Bahnschrift', sans-serif; font-size: 1.5em;">Area</h1>


In [1314]:
df_location.area.value_counts()

area
Closed    71
Open      14
Name: count, dtype: int64

## <h1 style="text-align: left; color: #FFB347; font-family: 'Bahnschrift', sans-serif; font-size: 1.5em;">Other services</h1>


In [1315]:
df_location.other_services.value_counts()

other_services
None        76
Variety      6
Internet     3
Name: count, dtype: int64

In [1316]:
df_location.head()

Unnamed: 0,placeID,latitude,longitude,the_geom_meter,name,address,city,state,country,fax,zip,alcohol,smoking_area,dress_code,accessibility,price,url,Rambience,franchise,area,other_services,Rcuisine,parking_lot,Rpayment,hours,days
0,135109,18.9217848,-99.2353499,0101000020957F0000A6Bf695F136F5Ac1Dadf87B20556...,Paniroles,?,,,Mexico,?,?,Wine/ Beer,Not Permitted,Informal,No Accessibility,Medium,?,Quiet,F,Closed,Internet,Italian,,Cash,08:00-21:00;,Mon;Tue;Wed;Thu;Fri;
3,135106,22.1497088,-100.9760928,0101000020957F0000649D6F21634858C119Ae9Bf528A3...,El Rinc�N De San Francisco,Universidad 169,San Luis Potosi,San Luis Potosi,Mexico,?,78000,Wine/ Beer,Only At Bar,Informal,Partially,Medium,?,Familiar,F,Open,,Mexican,,Mastercard-Eurocard,18:00-23:30;,Mon;Tue;Wed;Thu;Fri;
12,135104,23.7529821,-99.1684341,0101000020957F00007Cdf5Eafc58157C1645743B23E4F...,Vips,?,San Luis Potosi,San Luis Potosi,Mexico,?,?,Full Bar,Not Permitted,Informal,Completely,Medium,?,Familiar,T,Closed,Variety,Mexican,Yes,Mastercard-Eurocard,00:00-23:30;,Mon;Tue;Wed;Thu;Fri;
21,135088,18.8760113,-99.2198896,0101000020957F0000E14Ad4Dbc7765Ac1F7B33C85B153...,Cafeteria Cenidet,Interior Internado Palmira Sn,Cuernavaca,Morelos,Mexico,?,?,No Alcohol,Not Permitted,Informal,No Accessibility,Low,Www.Cenidet.Edu.Mx,Quiet,F,Closed,,Cafeteria,Public,Cash,09:00-16:00;,Mon;Tue;Wed;Thu;Fri;
24,135086,22.141421,-101.013955,0101000020957F0000Fa1A0E5A9B4858C17C884C4173Ae...,Mcdonalds Parque Tangamanga,Lateral Salvador Nava Martinez 3145,San Luis Potosi,San Luis Potosi,Mexico,?,78290,No Alcohol,Not Permitted,Informal,No Accessibility,Medium,?,Familiar,T,Closed,,Burgers,Yes,Mastercard-Eurocard,08:00-23:00;,Mon;Tue;Wed;Thu;Fri;


In [1317]:
df_location.head()

Unnamed: 0,placeID,latitude,longitude,the_geom_meter,name,address,city,state,country,fax,zip,alcohol,smoking_area,dress_code,accessibility,price,url,Rambience,franchise,area,other_services,Rcuisine,parking_lot,Rpayment,hours,days
0,135109,18.9217848,-99.2353499,0101000020957F0000A6Bf695F136F5Ac1Dadf87B20556...,Paniroles,?,,,Mexico,?,?,Wine/ Beer,Not Permitted,Informal,No Accessibility,Medium,?,Quiet,F,Closed,Internet,Italian,,Cash,08:00-21:00;,Mon;Tue;Wed;Thu;Fri;
3,135106,22.1497088,-100.9760928,0101000020957F0000649D6F21634858C119Ae9Bf528A3...,El Rinc�N De San Francisco,Universidad 169,San Luis Potosi,San Luis Potosi,Mexico,?,78000,Wine/ Beer,Only At Bar,Informal,Partially,Medium,?,Familiar,F,Open,,Mexican,,Mastercard-Eurocard,18:00-23:30;,Mon;Tue;Wed;Thu;Fri;
12,135104,23.7529821,-99.1684341,0101000020957F00007Cdf5Eafc58157C1645743B23E4F...,Vips,?,San Luis Potosi,San Luis Potosi,Mexico,?,?,Full Bar,Not Permitted,Informal,Completely,Medium,?,Familiar,T,Closed,Variety,Mexican,Yes,Mastercard-Eurocard,00:00-23:30;,Mon;Tue;Wed;Thu;Fri;
21,135088,18.8760113,-99.2198896,0101000020957F0000E14Ad4Dbc7765Ac1F7B33C85B153...,Cafeteria Cenidet,Interior Internado Palmira Sn,Cuernavaca,Morelos,Mexico,?,?,No Alcohol,Not Permitted,Informal,No Accessibility,Low,Www.Cenidet.Edu.Mx,Quiet,F,Closed,,Cafeteria,Public,Cash,09:00-16:00;,Mon;Tue;Wed;Thu;Fri;
24,135086,22.141421,-101.013955,0101000020957F0000Fa1A0E5A9B4858C17C884C4173Ae...,Mcdonalds Parque Tangamanga,Lateral Salvador Nava Martinez 3145,San Luis Potosi,San Luis Potosi,Mexico,?,78290,No Alcohol,Not Permitted,Informal,No Accessibility,Medium,?,Familiar,T,Closed,,Burgers,Yes,Mastercard-Eurocard,08:00-23:00;,Mon;Tue;Wed;Thu;Fri;


## <h1 style="text-align: left; color: #FFB347; font-family: 'Bahnschrift', sans-serif; font-size: 1.5em;">Rcuisine</h1>


In [1318]:
df_location.Rcuisine.value_counts()

Rcuisine
Mexican             22
Bar                 11
Fast Food            6
Cafeteria            6
Seafood              5
Burgers              4
Italian              4
American             4
Japanese             4
Pizzeria             3
Chinese              3
International        3
Family               2
Cafe-Coffee Shop     1
Contemporary         1
Breakfast-Brunch     1
Bar Pub Brewery      1
Bakery               1
Vietnamese           1
Armenian             1
Regional             1
Name: count, dtype: int64

## <h1 style="text-align: left; color: #FFB347; font-family: 'Bahnschrift', sans-serif; font-size: 1.5em;">Parking lot</h1>


In [1319]:
df_location.parking_lot.value_counts()

parking_lot
None             35
Yes              32
Public           16
Valet Parking     2
Name: count, dtype: int64

## <h1 style="text-align: left; color: #FFB347; font-family: 'Bahnschrift', sans-serif; font-size: 1.5em;">Rpayment</h1>


In [1320]:
df_location.Rpayment.value_counts()

Rpayment
Cash                   46
American Express       14
Mastercard-Eurocard    11
Bank Debit Cards       11
Visa                    2
Carte Blanche           1
Name: count, dtype: int64

# <h1 style="text-align: left; color: #FFB347; font-family: 'Bahnschrift Condensed', sans-serif; font-size: 2.5em;">Exploratory Análisis
</h1>


## <h1 style="text-align: left; color: #FFB347; font-family: 'Bahnschrift', sans-serif; font-size: 1.5em;">Type of food</h1>


In [1321]:
## We deactivate this cell when we want to run the code  
## to create `df_location_filter` and `df_location_filter_collapsed`,  
## to avoid reducing the variety of cuisine types in the filter.  
## However, we deactivate it for `df_location` as it is intended  
## for visualization, and to make it more user-friendly,  
## we group the less represented categories under "Others".  

""" percentages = df_location['Rcuisine'].value_counts(normalize=True)
categories_to_replace = percentages[percentages < 0.03].index
df_location['Rcuisine'] = df_location['Rcuisine'].replace(categories_to_replace, 'Others')
freq_rcuisine_replaced = df_location.Rcuisine.value_counts().sort_values(ascending=False)
freq_rcuisine_replaced

freq_rcuisine=df_location["Rcuisine"].value_counts()
freq_rcuisine.plot.pie(autopct='%1.0f%%' ,startangle=90)
plt.show() """


' percentages = df_location[\'Rcuisine\'].value_counts(normalize=True)\ncategories_to_replace = percentages[percentages < 0.03].index\ndf_location[\'Rcuisine\'] = df_location[\'Rcuisine\'].replace(categories_to_replace, \'Others\')\nfreq_rcuisine_replaced = df_location.Rcuisine.value_counts().sort_values(ascending=False)\nfreq_rcuisine_replaced\n\nfreq_rcuisine=df_location["Rcuisine"].value_counts()\nfreq_rcuisine.plot.pie(autopct=\'%1.0f%%\' ,startangle=90)\nplt.show() '

# <h1 style="text-align: left; color: #FFB347; font-family: 'Bahnschrift Condensed', sans-serif; font-size: 2.5em;">Exporting the datasets
</h1>


In [1322]:
## df_location.to_csv("../data/df_location.csv",index=False)

## <h1 style="text-align: left; color: #FFB347; font-family: 'Bahnschrift', sans-serif; font-size: 1.5em;">Renaming and dropping columns for Dataframe filter oriented</h1>


In [1323]:
rename_dict = {
    "placeID": "Place ID",
    "latitude": "R Latitude",
    "longitude": "R Longitude",
    "the_geom_meter": "Geometrical Meter",
    "name": "Restaurant Name",
    "address": "Address",
    "city": "City",
    "state": "State",
    "country": "Country",
    "fax": "Fax",
    "zip": "ZIP Code",
    "alcohol": "Alcohol Availability",
    "smoking_area": "Smoking Area",
    "dress_code": "Dress Code",
    "accessibility": "Accessibility",
    "price": "Price Range",
    "url": "Website",
    "Rambience": "Ambience",
    "franchise": "Franchise",
    "area": "Area",
    "other_services": "Other Services",
    "Rcuisine": "Cuisine Type",
    "parking_lot": "Parking Availability",
    "Rpayment": "Payment Methods",
    "hours": "Opening Hours",
    "days": "Operating Days"
}

# Renombrar las columnas del DataFrame
df_location = df_location.rename(columns=rename_dict)


In [1324]:
df_location=df_location.drop(columns=["Geometrical Meter","Fax","Website","Franchise"])

In [1325]:
## This cell will just be activated in case we haven't
## dropped the duplicates 
## df_location.to_csv("../data/df_location_filter.csv",index=False)

In [None]:
## This cell will just be activated in case we have dropped
## the duplicates at the beginning of the notebook.
## df_location.to_csv("../data/df_location_filter_collapsed.csv",index=False)