In [153]:
# Librerías
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [154]:
df = pd.read_csv("cdmx.csv",  encoding='latin1')
df.shape

(26401, 79)

In [155]:
print(df.isnull().sum().to_frame(name='nulos').to_string())

                                              nulos
id                                                0
listing_url                                       0
scrape_id                                         0
last_scraped                                      0
source                                            0
name                                              0
description                                     768
neighborhood_overview                         12431
picture_url                                       0
host_id                                           0
host_url                                          0
host_name                                       857
host_since                                      914
host_location                                  6237
host_about                                    11237
host_response_time                             4686
host_response_rate                             4686
host_acceptance_rate                           3610
host_is_supe

In [156]:
# Borrar primero las columnas que no tengas ningún registro 
df = df.drop(['neighbourhood_group_cleansed','calendar_updated', 'license'], axis=1)

In [157]:
df.columns

Index(['id', 'listing_url', 'scrape_id', 'last_scraped', 'source', 'name',
       'description', 'neighborhood_overview', 'picture_url', 'host_id',
       'host_url', 'host_name', 'host_since', 'host_location', 'host_about',
       'host_response_time', 'host_response_rate', 'host_acceptance_rate',
       'host_is_superhost', 'host_thumbnail_url', 'host_picture_url',
       'host_neighbourhood', 'host_listings_count',
       'host_total_listings_count', 'host_verifications',
       'host_has_profile_pic', 'host_identity_verified', 'neighbourhood',
       'neighbourhood_cleansed', 'latitude', 'longitude', 'property_type',
       'room_type', 'accommodates', 'bathrooms', 'bathrooms_text', 'bedrooms',
       'beds', 'amenities', 'price', 'minimum_nights', 'maximum_nights',
       'minimum_minimum_nights', 'maximum_minimum_nights',
       'minimum_maximum_nights', 'maximum_maximum_nights',
       'minimum_nights_avg_ntm', 'maximum_nights_avg_ntm', 'has_availability',
       'availability_3

In [158]:
# Separar las columnas numéricas y categóricas
df_obj = df.select_dtypes(include='object')
df_num = df.select_dtypes(include=['float64', 'int64'])

**Tratar los valores nulos en las columnas categóricas**

In [159]:
df_obj.isnull().sum()

listing_url                   0
last_scraped                  0
source                        0
name                          0
description                 768
neighborhood_overview     12431
picture_url                   0
host_url                      0
host_name                   857
host_since                  914
host_location              6237
host_about                11237
host_response_time         4686
host_response_rate         4686
host_acceptance_rate       3610
host_is_superhost          1359
host_thumbnail_url          914
host_picture_url            914
host_neighbourhood        12831
host_verifications          914
host_has_profile_pic        914
host_identity_verified      914
neighbourhood             12431
neighbourhood_cleansed        0
property_type                 0
room_type                     0
bathrooms_text               17
amenities                     0
price                      3274
has_availability           1000
calendar_last_scraped         0
first_re

In [160]:
# Rellenar los valores nulos en las columnas categóricas con un valor específico
# En estas columnas que cuentan con la información descriptiva de las propiedades, se rellena con un valor espcifico que nos permita idenfitificar que no se cuenta con esa información, ya que es la descripción de la propiedad y el vecindario, así como el nombre del host, también la columna de "host_about"
df_obj['description'] = df_obj['description'].fillna('No description provided') 
df_obj['neighborhood_overview'] = df_obj['neighborhood_overview'].fillna('No neighborhood overview provided')
df_obj['host_name'] = df_obj['host_name'].fillna('No host name provided')
df_obj['host_about'] = df_obj['host_about'].fillna('No host about provided')

In [161]:
# Formato de fecha
df_obj['host_since'] = pd.to_datetime(df_obj['host_since'], errors='coerce') # Convertir en formato fecha
df_obj['host_since'].dtype
mediana_fecha = df_obj['host_since'].median()
df_obj['host_since'] = df_obj['host_since'].fillna(mediana_fecha)

df_obj['first_review'] = pd.to_datetime(df_obj['first_review'], errors='coerce') # Convertir en formato fecha
df_obj['first_review'].dtype
mediana_1 = df_obj['first_review'].median()
df_obj['first_review'] = df_obj['first_review'].fillna(mediana_1)

df_obj['last_review'] = pd.to_datetime(df_obj['last_review'], errors='coerce') # Convertir en formato fecha
df_obj['last_review'].dtype

mediana_2 = df_obj['last_review'].median()
df_obj['last_review'] = df_obj['last_review'].fillna(mediana_2)

  df_obj['host_since'] = pd.to_datetime(df_obj['host_since'], errors='coerce') # Convertir en formato fecha


In [162]:
# Para la columna de "host_location", se rellena con el valor más frecuente, ya que es importante conocer la ubicación del anfitrión
most_frequent_location = df_obj['host_location'].mode()[0]
print(f"Most frequent location: {most_frequent_location}") # Como Mexico City es el valor más frecuente, se rellena con ese valor
df_obj['host_location'] = df_obj['host_location'].fillna(most_frequent_location)

Most frequent location: Mexico City, Mexico


In [163]:
# Para la columna categórica "Host_response_time", se rellena con el valor más frecuente, ya que es importante conocer el tiempo de respuesta del anfitrión
most_frequent_response_time = df_obj['host_response_time'].mode()[0]    
print(f"Most frequent response time: {most_frequent_response_time}") # Como "within a few hours" es el valor más frecuente, se rellena con ese valor
df_obj['host_response_time'] = df_obj['host_response_time'].fillna(most_frequent_response_time)

Most frequent response time: within an hour


In [164]:
# Columnas de %
# Conversión de porcentajes a numérico
for col in ['host_response_rate', 'host_acceptance_rate']:
    df_obj[col] = pd.to_numeric(
        df_obj[col].astype(str).str.replace('%', '', regex=True),
        errors='coerce'
    )
# Imputación cruzada: si falta host_response_rate, usar host_acceptance_rate
df_obj['host_response_rate'] = df_obj['host_response_rate'].fillna(df_obj['host_acceptance_rate'])

# Imputación final con marcadores artificiales para trazabilidad
df_obj['host_response_rate'] = df_obj['host_response_rate'].fillna(88)
df_obj['host_acceptance_rate'] = df_obj['host_acceptance_rate'].fillna(89)

In [165]:
# Para la columna de "host_is_superhost", se rellena con el valor más frecuente, ya que es importante conocer si el anfitrión es superhost o no
most_frequent_superhost = df_obj['host_is_superhost'].mode()[0]
print(f"Most frequent superhost: {most_frequent_superhost}") # Como "f" es el valor más frecuente, se rellena con ese valor
df_obj['host_is_superhost'] = df_obj['host_is_superhost'].fillna(most_frequent_superhost)

Most frequent superhost: f


In [166]:
# Para las columnas en URL se rellena con un URL especifico que nos permita identificar que no se cuenta con esa información
df_obj['host_thumbnail_url'] = df_obj['host_thumbnail_url'].fillna('https://a0.muscache.com/im/pictures/user/00000000-0000-0000-0000-000000000000.jpg?aki_policy=profile_small')
df_obj['host_picture_url']= df_obj['host_picture_url'].fillna('https://a0.muscache.com/im/users/000000/profile_pic/0000000000/original.jpg?aki_policy=profile_x_medium')

In [167]:
# Para los NAs de la columna  "host_neighbourhood", se rellena con "No host neighbourhood provided" ya que es más del 40% de los datos, y así no eliminamos tantos registros o la columna completa.
df_obj['host_neighbourhood'] = df_obj['host_neighbourhood'].fillna('No host neighbourhood provided')

In [168]:
# Para la columna "host_verifications", se utiliza el valor más frecuente, ya que es importante conocer si el anfitrión tiene verificación o no.
most_frequent_verification = df_obj['host_verifications'].mode()[0]
print(f"Most frequent verification: {most_frequent_verification}") # Como "email, phone" es el valor más frecuente, se rellena con ese valor
df_obj['host_verifications'] = df_obj['host_verifications'].fillna(most_frequent_verification)

Most frequent verification: ['email', 'phone']


In [169]:
# Para las columnas categóricas de f y t, se rellena con el valor más frecuente, ya que es importante conocer si el host tiene esas características o no.
df_obj['host_has_profile_pic'] = df_obj['host_has_profile_pic'].fillna('f')
df_obj['host_identity_verified'] = df_obj['host_identity_verified'].fillna('f')
df_obj['has_availability'] = df_obj['has_availability'].fillna('t')

In [170]:
# Al igual que la columna "host_neighbourhood", la columna "neighbourhood" tiene más del 40% de los datos nulos, por lo que se rellena con "No neighbourhood provided"
df_obj['neighbourhood'] = df_obj['neighbourhood'].fillna('No neighbourhood provided')

In [171]:
# Convertir la columna 'price' a tipo numérico, eliminando los símbolos de dólar y comas (IA consultada)
df_obj['price'] = df_obj['price'].astype(str).str.replace(r'[\$,]', '', regex=True)
df_obj['price'] = pd.to_numeric(df_obj['price'], errors='coerce')  # convierte a float, los errores se vuelven NaN
print("Valores nulos en price antes de imputar:", df_obj['price'].isna().sum())

Valores nulos en price antes de imputar: 3274


In [172]:
# Imputar con mediana por grupo (room_type y neighbourhood)
def imputar_seguro(x):
    mediana = x.median(skipna=True)
    return x.fillna(mediana)

df_obj['price'] = df_obj.groupby(['room_type', 'neighbourhood'])['price'].transform(imputar_seguro)

  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, ou

In [173]:
print("Valores nulos en price después de imputar:", df_obj['price'].isna().sum())

Valores nulos en price después de imputar: 45


In [174]:
# Imputar los valores nulos restantes con la mediana global de la columna "price"
mediana_global = df_obj['price'].median(skipna=True)
df_obj['price'] = df_obj['price'].fillna(mediana_global)

In [175]:
df_obj.isnull().sum()

listing_url                0
last_scraped               0
source                     0
name                       0
description                0
neighborhood_overview      0
picture_url                0
host_url                   0
host_name                  0
host_since                 0
host_location              0
host_about                 0
host_response_time         0
host_response_rate         0
host_acceptance_rate       0
host_is_superhost          0
host_thumbnail_url         0
host_picture_url           0
host_neighbourhood         0
host_verifications         0
host_has_profile_pic       0
host_identity_verified     0
neighbourhood              0
neighbourhood_cleansed     0
property_type              0
room_type                  0
bathrooms_text            17
amenities                  0
price                      0
has_availability           0
calendar_last_scraped      0
first_review               0
last_review                0
instant_bookable           0
dtype: int64

**Tratar los valores nulos en las columnas numéricas**

In [176]:
df_num.isnull().sum()

id                                                 0
scrape_id                                          0
host_id                                            0
host_listings_count                              914
host_total_listings_count                        914
latitude                                           0
longitude                                          0
accommodates                                       0
bathrooms                                       3281
bedrooms                                         924
beds                                            3297
minimum_nights                                     0
maximum_nights                                     0
minimum_minimum_nights                            20
maximum_minimum_nights                            20
minimum_maximum_nights                            20
maximum_maximum_nights                            20
minimum_nights_avg_ntm                             0
maximum_nights_avg_ntm                        

In [177]:
df_num.describe()

Unnamed: 0,id,scrape_id,host_id,host_listings_count,host_total_listings_count,latitude,longitude,accommodates,bathrooms,bedrooms,...,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
count,26401.0,26401.0,26401.0,25487.0,25487.0,26401.0,26401.0,26401.0,23120.0,25477.0,...,23026.0,23026.0,23026.0,23026.0,23026.0,26401.0,26401.0,26401.0,26401.0,23028.0
mean,6.488522e+17,20250600000000.0,235411200.0,24.467022,33.189822,19.405311,-99.165638,3.333434,1.450389,1.555089,...,4.751745,4.836819,4.823578,4.840647,4.713399,14.686906,11.626113,2.806636,0.212984,1.806801
std,5.483558e+17,0.0,200584400.0,82.394384,115.57014,0.042498,0.033679,2.338818,1.033995,1.272157,...,0.378038,0.326614,0.359979,0.298201,0.395388,33.417154,32.908658,7.745238,2.059543,2.125861
min,35797.0,20250600000000.0,7365.0,1.0,1.0,19.177848,-99.33963,1.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.01
25%,42559920.0,20250600000000.0,54364770.0,1.0,2.0,19.39188,-99.178614,2.0,1.0,1.0,...,4.7,4.82,4.81,4.8,4.66,1.0,0.0,0.0,0.0,0.44
50%,7.821415e+17,20250600000000.0,170583300.0,4.0,5.0,19.41512,-99.16726,2.0,1.0,1.0,...,4.85,4.92,4.92,4.92,4.8,3.0,1.0,0.0,0.0,1.23
75%,1.148084e+18,20250600000000.0,416156000.0,14.0,17.0,19.431892,-99.153975,4.0,2.0,2.0,...,4.95,5.0,5.0,5.0,4.91,11.0,6.0,2.0,0.0,2.56
max,1.450438e+18,20250600000000.0,703177300.0,896.0,946.0,19.56101,-98.96336,16.0,49.0,50.0,...,5.0,5.0,5.0,5.0,5.0,235.0,235.0,70.0,30.0,80.38


In [178]:
# Columnas numéricas con valores decimales 
df_num['review_scores_rating'] = df_num['review_scores_rating'].fillna(round(df_num['review_scores_rating'].mean(), 2))
df_num['review_scores_accuracy'] = df_num['review_scores_accuracy'].fillna(round(df_num['review_scores_accuracy'].mean(), 2))
df_num['review_scores_cleanliness'] = df_num['review_scores_cleanliness'].fillna(round(df_num['review_scores_cleanliness'].mean(), 2))
df_num['review_scores_checkin'] = df_num['review_scores_checkin'].fillna(round(df_num['review_scores_checkin'].mean(), 2))
df_num['review_scores_communication'] = df_num['review_scores_communication'].fillna(round(df_num['review_scores_communication'].mean(), 2))
df_num['review_scores_location'] = df_num['review_scores_location'].fillna(round(df_num['review_scores_location'].mean(), 2))
df_num['review_scores_value'] = df_num['review_scores_value'].fillna(round(df_num['review_scores_value'].mean(), 2))
df_num['reviews_per_month'] = df_num['reviews_per_month'].fillna(round(df_num['reviews_per_month'].mean(), 2))

In [179]:
# Las columnas que no tienen decimales tratarlas con la mediana
df_num['host_listings_count'] = df_num['host_listings_count'].fillna(df_num['host_listings_count'].median())
df_num['host_total_listings_count'] = df_num['host_total_listings_count'].fillna(df_num['host_total_listings_count'].median())
df_num['bedrooms'] = df_num['bedrooms'].fillna(df_num['bedrooms'].median())
df_num['beds'] = df_num['beds'].fillna(df_num['beds'].median())
df_num['minimum_minimum_nights'] = df_num['minimum_minimum_nights'].fillna(df_num['minimum_minimum_nights'].median())
df_num['maximum_minimum_nights'] = df_num['maximum_minimum_nights'].fillna(df_num['maximum_minimum_nights'].median())
df_num['minimum_maximum_nights'] = df_num['minimum_maximum_nights'].fillna(df_num['minimum_maximum_nights'].median())
df_num['maximum_maximum_nights'] = df_num['maximum_maximum_nights'].fillna(df_num['maximum_maximum_nights'].median())
df_num['estimated_revenue_l365d'] = df_num['estimated_revenue_l365d'].fillna(df_num['estimated_revenue_l365d'].median())

In [180]:
df_num.isnull().sum()

id                                                 0
scrape_id                                          0
host_id                                            0
host_listings_count                                0
host_total_listings_count                          0
latitude                                           0
longitude                                          0
accommodates                                       0
bathrooms                                       3281
bedrooms                                           0
beds                                               0
minimum_nights                                     0
maximum_nights                                     0
minimum_minimum_nights                             0
maximum_minimum_nights                             0
minimum_maximum_nights                             0
maximum_maximum_nights                             0
minimum_nights_avg_ntm                             0
maximum_nights_avg_ntm                        

In [181]:
# Juntar los dos data frames en uno
data = pd.concat([df_obj, df_num], axis=1) 
data.describe()

Unnamed: 0,host_since,host_response_rate,host_acceptance_rate,price,first_review,last_review,id,scrape_id,host_id,host_listings_count,...,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
count,26401,26401.0,26401.0,26401.0,26401,26401,26401.0,26401.0,26401.0,26401.0,...,26401.0,26401.0,26401.0,26401.0,26401.0,26401.0,26401.0,26401.0,26401.0,26401.0
mean,2018-08-18 14:40:13.272224512,91.930609,88.873603,1860.831124,2022-09-08 18:18:07.322449920,2025-02-26 22:42:52.538918656,6.488522e+17,20250600000000.0,235411200.0,23.758456,...,4.751522,4.837226,4.823121,4.840564,4.712965,14.686906,11.626113,2.806636,0.212984,1.80721
min,2009-02-03 00:00:00,0.0,0.0,66.0,2011-01-11 00:00:00,2014-01-08 00:00:00,35797.0,20250600000000.0,7365.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.01
25%,2016-03-01 00:00:00,91.0,89.0,582.0,2022-12-04 00:00:00,2025-04-06 00:00:00,42559920.0,20250600000000.0,54364770.0,1.0,...,4.73,4.83,4.82,4.83,4.68,1.0,0.0,0.0,0.0,0.54
50%,2018-01-31 00:00:00,100.0,99.0,1043.0,2022-12-04 00:00:00,2025-04-06 00:00:00,7.821415e+17,20250600000000.0,170583300.0,4.0,...,4.81,4.89,4.9,4.9,4.77,3.0,1.0,0.0,0.0,1.57
75%,2021-06-15 00:00:00,100.0,100.0,1522.0,2022-12-04 00:00:00,2025-04-06 00:00:00,1.148084e+18,20250600000000.0,416156000.0,13.0,...,4.94,4.98,4.99,4.98,4.89,11.0,6.0,2.0,0.0,2.31
max,2025-06-24 00:00:00,100.0,100.0,900000.0,2025-12-06 00:00:00,2025-12-06 00:00:00,1.450438e+18,20250600000000.0,703177300.0,896.0,...,5.0,5.0,5.0,5.0,5.0,235.0,235.0,70.0,30.0,80.38
std,,21.186621,23.015447,17319.675925,,,5.483558e+17,0.0,200584400.0,81.04195,...,0.353048,0.305025,0.336184,0.278488,0.369253,33.417154,32.908658,7.745238,2.059543,1.985417


In [182]:
print(data.isnull().sum().to_frame(name='nulos').to_string())

                                              nulos
listing_url                                       0
last_scraped                                      0
source                                            0
name                                              0
description                                       0
neighborhood_overview                             0
picture_url                                       0
host_url                                          0
host_name                                         0
host_since                                        0
host_location                                     0
host_about                                        0
host_response_time                                0
host_response_rate                                0
host_acceptance_rate                              0
host_is_superhost                                 0
host_thumbnail_url                                0
host_picture_url                                  0
host_neighbo

In [183]:
# Para la columna "bathrooms" se rellenara con el número que este en la columna "bathrooms_text" ya que es la misma información pero en el número con el texto (IA consultada)
# Extraer el número (puede ser decimal) usando expresión regular
data['bathrooms_text_clean'] = data['bathrooms_text'].str.extract(r'(\d+\.?\d*)')

# Convertir a tipo float
data['bathrooms_text_clean'] = pd.to_numeric(data['bathrooms_text_clean'], errors='coerce')

In [184]:
# Sustituir los NAs en bathrooms con bathrooms_text_clean
data['bathrooms'] = data['bathrooms'].fillna(data['bathrooms_text_clean'])

In [185]:
data.drop(columns=['bathrooms_text_clean'], inplace=True)

In [187]:
print("NAs restantes en bathrooms:", data['bathrooms'].isna().sum())


NAs restantes en bathrooms: 26


In [188]:
# Sustituir los demas NAs en bathrooms y bathrooms_text con 0 y 0 baths
data['bathrooms'] = data['bathrooms'].fillna(0)
data['bathrooms_text'] = data['bathrooms_text'].fillna('0 baths')

In [189]:
print(data.isnull().sum().to_frame(name='nulos').to_string())

                                              nulos
listing_url                                       0
last_scraped                                      0
source                                            0
name                                              0
description                                       0
neighborhood_overview                             0
picture_url                                       0
host_url                                          0
host_name                                         0
host_since                                        0
host_location                                     0
host_about                                        0
host_response_time                                0
host_response_rate                                0
host_acceptance_rate                              0
host_is_superhost                                 0
host_thumbnail_url                                0
host_picture_url                                  0
host_neighbo

In [190]:
# Ordenar las columnas como el df original
# Lista con el orden deseado
column_order = ['id', 'listing_url', 'scrape_id', 'last_scraped', 'source', 'name',
    'description', 'neighborhood_overview', 'picture_url', 'host_id',
    'host_url', 'host_name', 'host_since', 'host_location', 'host_about',
    'host_response_time', 'host_response_rate', 'host_acceptance_rate',
    'host_is_superhost', 'host_thumbnail_url', 'host_picture_url',
    'host_neighbourhood', 'host_listings_count',
    'host_total_listings_count', 'host_verifications',
    'host_has_profile_pic', 'host_identity_verified', 'neighbourhood',
    'neighbourhood_cleansed', 'latitude', 'longitude', 'property_type',
    'room_type', 'accommodates', 'bathrooms', 'bathrooms_text', 'bedrooms',
    'beds', 'amenities', 'price', 'minimum_nights', 'maximum_nights',
    'minimum_minimum_nights', 'maximum_minimum_nights',
    'minimum_maximum_nights', 'maximum_maximum_nights',
    'minimum_nights_avg_ntm', 'maximum_nights_avg_ntm', 'has_availability',
    'availability_30', 'availability_60', 'availability_90',
    'availability_365', 'calendar_last_scraped', 'number_of_reviews',
    'number_of_reviews_ltm', 'number_of_reviews_l30d', 'availability_eoy',
    'number_of_reviews_ly', 'estimated_occupancy_l365d',
    'estimated_revenue_l365d', 'first_review', 'last_review',
    'review_scores_rating', 'review_scores_accuracy',
    'review_scores_cleanliness', 'review_scores_checkin',
    'review_scores_communication', 'review_scores_location',
    'review_scores_value', 'instant_bookable',
    'calculated_host_listings_count',
    'calculated_host_listings_count_entire_homes',
    'calculated_host_listings_count_private_rooms',
    'calculated_host_listings_count_shared_rooms', 'reviews_per_month']

# Reordenar el DataFrame
data = data[column_order]


In [None]:
# Verificamos que el data frame final tenga los registros y columnas esperadas, ya sin incluir las columnas eliminadas
print(data.shape)

(26401, 76)


In [None]:
data.isna().any().any() # Se vuelve a verificar que no haya NAs

np.False_

In [194]:
#Convertir el df ya limpio a CSV
data.to_csv("cdmx_sin_nas.csv")