In [4]:

from scrapy.item import Field
from scrapy.item import Item
from scrapy.spiders import CrawlSpider, Rule
from scrapy.selector import Selector
from scrapy.loader.processors import MapCompose
from scrapy.linkextractors import LinkExtractor
from scrapy.loader import ItemLoader

from scrapy.spiders import Spider
from scrapy.loader import ItemLoader
from bs4 import BeautifulSoup
from scrapy.crawler import CrawlerProcess

In [5]:

class Hotel(Item):
    nombre = Field()
    precio = Field() # El precio ahora carga dinamicamente. Por eso ahora obtenemos el score del hotel
    score = Field()
    categorias = Field()
    direccion = Field()

# CLASE CORE - Al querer hacer extraccion de multiples paginas, heredamos de CrawlSpider
class TripAdvisor(CrawlSpider):
    name = 'hotelestripadvisor'
    custom_settings = {
        'USER_AGENT': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
    }

    # Reduce el espectro de busqueda de URLs. No nos podemos salir de los dominios de esta lista
    allowed_domains = ['tripadvisor.com']

    # Url semilla a la cual se hara el primer requerimiento
    #start_urls = ['https://www.tripadvisor.com/Hotels-g303845-Guayaquil_Guayas_Province-Hotels.html']
    start_urls = ['https://www.tripadvisor.com/Hotels-g294299-Puerto_Varas_Los_Lagos_Region-Hotels.html']

    # Tiempo de espera entre cada requerimiento. Nos ayuda a proteger nuestra IP.
    download_delay = 2

    # Tupla de reglas para direccionar el movimiento de nuestro Crawler a traves de las paginas
    rules = (
        Rule( # Regla de movimiento VERTICAL hacia el detalle de los hoteles
            LinkExtractor(
                allow=r'/Hotel_Review-' # Si la URL contiene este patron, haz un requerimiento a esa URL
            ), follow=True, callback="parse_hotel"), # El callback es el nombre de la funcion que se va a llamar con la respuesta al requerimiento hacia estas URLs
    )

    # Funcion a utilizar con MapCompose para realizar limpieza de datos
    def quitarDolar(self, texto):
        return texto.replace("$", "")

    # Callback de la regla
    def parse_hotel(self, response):
        sel = Selector(response)
        item = ItemLoader(Hotel(), sel)
        item.add_xpath('nombre', '//h1[@id="HEADING"]/text()')
        item.add_xpath('precio', '//div[@class = "vyWtt"]/text()',MapCompose(self.quitarDolar))
        item.add_xpath('score', '//span[@class ="kJyXc P"]/text()')
        item.add_xpath('categorias', '//div[@class = "PtOPK"]/text()')
        item.add_xpath('direccion', '//span[@class = "biGQs _P pZUbB KxBGd"]/text()')
        
        # Utilizo Map Compose con funciones anonimas
        # PARA INVESTIGAR: Que son las funciones anonimas en Python?
        #item.add_xpath('descripcion', '//div[@id="ABOUT_TAB"]//div[@class="fIrGe _T"]/text()', # //text() nos permite obtener el texto de todos los hijos
                       #MapCompose(lambda i: i.replace('\n', '').replace('\r', '')))
        #item.add_xpath('amenities',
                    #   '//div[contains(@data-test-target, "amenity_text")]/text()')
        yield item.load_item()

# EJECUCION
# scrapy runspider 1_tripadvisor.py -o tripadvisor.csv


In [6]:
process = CrawlerProcess({
    'FEED_FORMAT': 'csv',
    'FEED_URI': 'Propuesta_hoteles_ptvaras2024_PRUEB.csv'
})

process.crawl(TripAdvisor)
process.start()

2024-03-25 18:01:54 [scrapy.utils.log] INFO: Scrapy 2.11.1 started (bot: scrapybot)
2024-03-25 18:01:54 [scrapy.utils.log] INFO: Versions: lxml 5.1.0.0, libxml2 2.10.3, cssselect 1.2.0, parsel 1.9.0, w3lib 2.1.2, Twisted 24.3.0, Python 3.12.1 (tags/v3.12.1:2305ca5, Dec  7 2023, 22:03:25) [MSC v.1937 64 bit (AMD64)], pyOpenSSL 24.1.0 (OpenSSL 3.2.1 30 Jan 2024), cryptography 42.0.5, Platform Windows-11-10.0.22621-SP0
2024-03-25 18:01:54 [scrapy.addons] INFO: Enabled addons:
[]


See the documentation of the 'REQUEST_FINGERPRINTER_IMPLEMENTATION' setting for information on how to handle this deprecation.
  return cls(crawler)

2024-03-25 18:01:54 [scrapy.utils.log] DEBUG: Using reactor: twisted.internet.selectreactor.SelectReactor
2024-03-25 18:01:54 [scrapy.extensions.telnet] INFO: Telnet Password: 7c51e8f75ba4e2b1
  exporter = cls(crawler)

2024-03-25 18:01:55 [scrapy.middleware] INFO: Enabled extensions:
['scrapy.extensions.corestats.CoreStats',
 'scrapy.extensions.telnet.TelnetConsol

In [None]:
from geopy.geocoders import Nominatim

# Crea una instancia del geocodificador Nominatim con un user_agent personalizado
geolocator = Nominatim(user_agent="mi_aplicacion_geocodificadora")

In [None]:
import pandas as pd
# Para configurar opciones de visualización en pandas para que muestre más filas o columnas, puedes usar pd.set_option
pd.set_option('display.max_colwidth', 1000) 
# Para restablecer las opciones a su valor por defecto, puedes usar pd.reset_option
# pd.reset_option('display.max_rows')
# pd.reset_option('display.max_columns')

df = pd.read_csv('./data/Propuesta_hoteles_ptvaras2024.csv')
df

Unnamed: 0,categorias,direccion,nombre,precio,score
0,Excellent,"8,194, reviews,Via Napo Torriani 18, 20124 Milan Italy,8,194, reviews,Via Napo Torriani 18, 20124 Milan Italy,Name/address in local language",Hotel Berna,,4.5
1,Very good,"531, reviews,La Paz 471, Puerto Varas 5550000 Chile,531, reviews,La Paz 471, Puerto Varas 5550000 Chile,Name/address in local language",Park Inn By Radisson Puerto Varas,,3.5
2,Very good,"263, reviews,Avenida Los Colonos no. 60, Puerto Varas Chile,263, reviews,Avenida Los Colonos no. 60, Puerto Varas Chile,Name/address in local language",Puerto Chico Hotel,,4.0
3,Very good,"342, reviews,Manzanal 66 Corner Rosario, Puerto Varas 5550587 Chile,342, reviews,Manzanal 66 Corner Rosario, Puerto Varas 5550587 Chile,Name/address in local language",Casa Azul Hostel,,4.0
4,Excellent,"347, reviews,Imperial 695, Puerto Varas 12345 Chile,347, reviews,Imperial 695, Puerto Varas 12345 Chile,Name/address in local language",Hotel Puelche,,4.5
...,...,...,...,...,...
2219,Excellent,"320, reviews,Pigianos Kampos, Pigianos Kampos, Crete 74100 Greece,320, reviews,Pigianos Kampos, Pigianos Kampos, Crete 74100 Greece,Name/address in local language",CHC Imperial Palace,,4.5
2220,Very good,"404, reviews,7828 Orangethorpe Ave, Buena Park, CA 90621,404, reviews,7828 Orangethorpe Ave, Buena Park, CA 90621",Fairfield Inn & Suites Anaheim North/Buena Park,,4.0
2221,Excellent,"2,329, reviews,830 Conti St, New Orleans, LA 70112-3470,2,329, reviews,830 Conti St, New Orleans, LA 70112-3470",Prince Conti Hotel,,4.5
2222,Excellent,"2,329, reviews,830 Conti St, New Orleans, LA 70112-3470,2,329, reviews,830 Conti St, New Orleans, LA 70112-3470",Prince Conti Hotel,,4.5


In [None]:
df_chile = df[df['direccion'].str.contains('Chile',na=False)]

In [None]:
# Utilizamos una expresión regular para extraer el número de reviews
df_chile['reviews'] = df_chile['direccion'].str.extract(r'(\d+), reviews')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chile['reviews'] = df_chile['direccion'].str.extract(r'(\d+), reviews')


In [None]:
#df_chile.to_csv('hoteles_ptvaras_chile.csv')

In [None]:
df_chile = df_chile.drop_duplicates(subset=['nombre'], keep='last')

In [None]:
# Re-importando pandas ya que el estado del código se ha reiniciado

# Definir la función que realiza las transformaciones requeridas en la cadena de texto
def transformar_direccion(texto):
    # Elimina todos los números que empiezan desde la izquierda
    texto = texto.lstrip('0123456789, ')
    # Elimina la palabra 'reviews'
    texto = texto.replace('reviews', '')
    texto = texto.replace('Name/address in local language', '')
    #texto = texto.replace(',', '')
    # Elimina solo el número de la derecha (el caracter numérico final de la cadena)
    texto = texto.rstrip('0123456789')
    # Elimina la última coma y espacios resultantes al final de la cadena
    texto = texto.rstrip(', ')
    return texto.strip()

# Aplicar la función a cada elemento de la columna 'direccion'
df_chile['direccion'] = df_chile['direccion'].apply(transformar_direccion)

df_chile


Unnamed: 0,categorias,direccion,nombre,precio,score,reviews
16,Excellent,",Los Colonos 1183, Puerto Varas 5550100 Chile,3, ,Los Colonos 1183, Puerto Varas 5550100 Chile",Silvestre Cabanas,,4.5,3
19,Very good,",Avenida Los Colonos no. 60, Puerto Varas Chile,263, ,Avenida Los Colonos no. 60, Puerto Varas Chile",Puerto Chico Hotel,,4.0,263
52,Very good,",La Paz 471, Puerto Varas 5550000 Chile,531, ,La Paz 471, Puerto Varas 5550000 Chile",Park Inn By Radisson Puerto Varas,,3.5,531
72,Excellent,",Imperial 695, Puerto Varas 12345 Chile,347, ,Imperial 695, Puerto Varas 12345 Chile",Hotel Puelche,,4.5,347
91,Excellent,",Del Salvador 024, Puerto Varas 5550000 Chile,861, ,Del Salvador 024, Puerto Varas 5550000 Chile",Radisson Hotel Puerto Varas,,4.5,861
93,Excellent,",Ruta 225, Km. 27 Sector Los Riscos, Puerto Varas 5550000 Chile,339, ,Ruta 225, Km. 27 Sector Los Riscos, Puerto Varas 5550000 Chile",Hotel Awa,,4.5,339
105,Very good,",Manzanal 66 Corner Rosario, Puerto Varas 5550587 Chile,342, ,Manzanal 66 Corner Rosario, Puerto Varas 5550587 Chile",Casa Azul Hostel,,4.0,342
107,Very good,",Klenner 349, Puerto Varas 5550454 Chile,970, ,Klenner 349, Puerto Varas 5550454 Chile",Enjoy Puerto Varas,,4.0,970
125,Excellent,",Klener 195 Pasaje Luis Wellmann, Puerto Varas 5550447 Chile,2,950, ,Klener 195 Pasaje Luis Wellmann, Puerto Varas 5550447 Chile",Hotel Cabana del Lago,,4.5,950
129,Excellent,",Imperial 0211, Puerto Varas 5550000 Chile,821, ,Imperial 0211, Puerto Varas 5550000 Chile",Solace Hotel,,4.5,821


In [None]:
# Utilizamos una expresión regular para eliminar letras pegadas a números en cada fila.
df_chile['texto'] = df_chile['direccion'].str.replace(r'[A-Za-z]+\d+', '', regex=True)
df_chile

Unnamed: 0,categorias,direccion,nombre,precio,score,reviews,texto
16,Excellent,",Los Colonos 1183, Puerto Varas 5550100 Chile,3, ,Los Colonos 1183, Puerto Varas 5550100 Chile",Silvestre Cabanas,,4.5,3,",Los Colonos 1183, Puerto Varas 5550100 Chile,3, ,Los Colonos 1183, Puerto Varas 5550100 Chile"
19,Very good,",Avenida Los Colonos no. 60, Puerto Varas Chile,263, ,Avenida Los Colonos no. 60, Puerto Varas Chile",Puerto Chico Hotel,,4.0,263,",Avenida Los Colonos no. 60, Puerto Varas Chile,263, ,Avenida Los Colonos no. 60, Puerto Varas Chile"
52,Very good,",La Paz 471, Puerto Varas 5550000 Chile,531, ,La Paz 471, Puerto Varas 5550000 Chile",Park Inn By Radisson Puerto Varas,,3.5,531,",La Paz 471, Puerto Varas 5550000 Chile,531, ,La Paz 471, Puerto Varas 5550000 Chile"
72,Excellent,",Imperial 695, Puerto Varas 12345 Chile,347, ,Imperial 695, Puerto Varas 12345 Chile",Hotel Puelche,,4.5,347,",Imperial 695, Puerto Varas 12345 Chile,347, ,Imperial 695, Puerto Varas 12345 Chile"
91,Excellent,",Del Salvador 024, Puerto Varas 5550000 Chile,861, ,Del Salvador 024, Puerto Varas 5550000 Chile",Radisson Hotel Puerto Varas,,4.5,861,",Del Salvador 024, Puerto Varas 5550000 Chile,861, ,Del Salvador 024, Puerto Varas 5550000 Chile"
93,Excellent,",Ruta 225, Km. 27 Sector Los Riscos, Puerto Varas 5550000 Chile,339, ,Ruta 225, Km. 27 Sector Los Riscos, Puerto Varas 5550000 Chile",Hotel Awa,,4.5,339,",Ruta 225, Km. 27 Sector Los Riscos, Puerto Varas 5550000 Chile,339, ,Ruta 225, Km. 27 Sector Los Riscos, Puerto Varas 5550000 Chile"
105,Very good,",Manzanal 66 Corner Rosario, Puerto Varas 5550587 Chile,342, ,Manzanal 66 Corner Rosario, Puerto Varas 5550587 Chile",Casa Azul Hostel,,4.0,342,",Manzanal 66 Corner Rosario, Puerto Varas 5550587 Chile,342, ,Manzanal 66 Corner Rosario, Puerto Varas 5550587 Chile"
107,Very good,",Klenner 349, Puerto Varas 5550454 Chile,970, ,Klenner 349, Puerto Varas 5550454 Chile",Enjoy Puerto Varas,,4.0,970,",Klenner 349, Puerto Varas 5550454 Chile,970, ,Klenner 349, Puerto Varas 5550454 Chile"
125,Excellent,",Klener 195 Pasaje Luis Wellmann, Puerto Varas 5550447 Chile,2,950, ,Klener 195 Pasaje Luis Wellmann, Puerto Varas 5550447 Chile",Hotel Cabana del Lago,,4.5,950,",Klener 195 Pasaje Luis Wellmann, Puerto Varas 5550447 Chile,2,950, ,Klener 195 Pasaje Luis Wellmann, Puerto Varas 5550447 Chile"
129,Excellent,",Imperial 0211, Puerto Varas 5550000 Chile,821, ,Imperial 0211, Puerto Varas 5550000 Chile",Solace Hotel,,4.5,821,",Imperial 0211, Puerto Varas 5550000 Chile,821, ,Imperial 0211, Puerto Varas 5550000 Chile"


In [None]:
# Eliminar solo la primera coma de cada elemento en la columna 'texto'
df_chile['texto'] = df_chile['texto'].apply(lambda x: x.replace(',', '', 1))
df_chile

Unnamed: 0,categorias,direccion,nombre,precio,score,reviews,texto
16,Excellent,",Los Colonos 1183, Puerto Varas 5550100 Chile,3, ,Los Colonos 1183, Puerto Varas 5550100 Chile",Silvestre Cabanas,,4.5,3,"Los Colonos 1183, Puerto Varas 5550100 Chile,3, ,Los Colonos 1183, Puerto Varas 5550100 Chile"
19,Very good,",Avenida Los Colonos no. 60, Puerto Varas Chile,263, ,Avenida Los Colonos no. 60, Puerto Varas Chile",Puerto Chico Hotel,,4.0,263,"Avenida Los Colonos no. 60, Puerto Varas Chile,263, ,Avenida Los Colonos no. 60, Puerto Varas Chile"
52,Very good,",La Paz 471, Puerto Varas 5550000 Chile,531, ,La Paz 471, Puerto Varas 5550000 Chile",Park Inn By Radisson Puerto Varas,,3.5,531,"La Paz 471, Puerto Varas 5550000 Chile,531, ,La Paz 471, Puerto Varas 5550000 Chile"
72,Excellent,",Imperial 695, Puerto Varas 12345 Chile,347, ,Imperial 695, Puerto Varas 12345 Chile",Hotel Puelche,,4.5,347,"Imperial 695, Puerto Varas 12345 Chile,347, ,Imperial 695, Puerto Varas 12345 Chile"
91,Excellent,",Del Salvador 024, Puerto Varas 5550000 Chile,861, ,Del Salvador 024, Puerto Varas 5550000 Chile",Radisson Hotel Puerto Varas,,4.5,861,"Del Salvador 024, Puerto Varas 5550000 Chile,861, ,Del Salvador 024, Puerto Varas 5550000 Chile"
93,Excellent,",Ruta 225, Km. 27 Sector Los Riscos, Puerto Varas 5550000 Chile,339, ,Ruta 225, Km. 27 Sector Los Riscos, Puerto Varas 5550000 Chile",Hotel Awa,,4.5,339,"Ruta 225, Km. 27 Sector Los Riscos, Puerto Varas 5550000 Chile,339, ,Ruta 225, Km. 27 Sector Los Riscos, Puerto Varas 5550000 Chile"
105,Very good,",Manzanal 66 Corner Rosario, Puerto Varas 5550587 Chile,342, ,Manzanal 66 Corner Rosario, Puerto Varas 5550587 Chile",Casa Azul Hostel,,4.0,342,"Manzanal 66 Corner Rosario, Puerto Varas 5550587 Chile,342, ,Manzanal 66 Corner Rosario, Puerto Varas 5550587 Chile"
107,Very good,",Klenner 349, Puerto Varas 5550454 Chile,970, ,Klenner 349, Puerto Varas 5550454 Chile",Enjoy Puerto Varas,,4.0,970,"Klenner 349, Puerto Varas 5550454 Chile,970, ,Klenner 349, Puerto Varas 5550454 Chile"
125,Excellent,",Klener 195 Pasaje Luis Wellmann, Puerto Varas 5550447 Chile,2,950, ,Klener 195 Pasaje Luis Wellmann, Puerto Varas 5550447 Chile",Hotel Cabana del Lago,,4.5,950,"Klener 195 Pasaje Luis Wellmann, Puerto Varas 5550447 Chile,2,950, ,Klener 195 Pasaje Luis Wellmann, Puerto Varas 5550447 Chile"
129,Excellent,",Imperial 0211, Puerto Varas 5550000 Chile,821, ,Imperial 0211, Puerto Varas 5550000 Chile",Solace Hotel,,4.5,821,"Imperial 0211, Puerto Varas 5550000 Chile,821, ,Imperial 0211, Puerto Varas 5550000 Chile"


In [None]:
df_chile.reset_index(drop=True,inplace=True)

In [None]:
df_chile_text = df_chile[['texto']]
df_chile_text

Unnamed: 0,texto
0,"Los Colonos 1183, Puerto Varas 5550100 Chile,3, ,Los Colonos 1183, Puerto Varas 5550100 Chile"
1,"Avenida Los Colonos no. 60, Puerto Varas Chile,263, ,Avenida Los Colonos no. 60, Puerto Varas Chile"
2,"La Paz 471, Puerto Varas 5550000 Chile,531, ,La Paz 471, Puerto Varas 5550000 Chile"
3,"Imperial 695, Puerto Varas 12345 Chile,347, ,Imperial 695, Puerto Varas 12345 Chile"
4,"Del Salvador 024, Puerto Varas 5550000 Chile,861, ,Del Salvador 024, Puerto Varas 5550000 Chile"
5,"Ruta 225, Km. 27 Sector Los Riscos, Puerto Varas 5550000 Chile,339, ,Ruta 225, Km. 27 Sector Los Riscos, Puerto Varas 5550000 Chile"
6,"Manzanal 66 Corner Rosario, Puerto Varas 5550587 Chile,342, ,Manzanal 66 Corner Rosario, Puerto Varas 5550587 Chile"
7,"Klenner 349, Puerto Varas 5550454 Chile,970, ,Klenner 349, Puerto Varas 5550454 Chile"
8,"Klener 195 Pasaje Luis Wellmann, Puerto Varas 5550447 Chile,2,950, ,Klener 195 Pasaje Luis Wellmann, Puerto Varas 5550447 Chile"
9,"Imperial 0211, Puerto Varas 5550000 Chile,821, ,Imperial 0211, Puerto Varas 5550000 Chile"


In [None]:
# Separar por comas cada fila y expandir en nuevas columnas
df_chile_text_sep = df_chile_text['texto'].str.split(',', expand=True)

# Asignar nuevos nombres de columna con formato x1, x2, ..., xn
nombres_columnas = [f'x{i+1}' for i in range(df_chile_text_sep.shape[1])]
df_chile_text_sep.columns = nombres_columnas
df_chile_text_sep

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8
0,Los Colonos 1183,Puerto Varas 5550100 Chile,3,,Los Colonos 1183,Puerto Varas 5550100 Chile,,
1,Avenida Los Colonos no. 60,Puerto Varas Chile,263,,Avenida Los Colonos no. 60,Puerto Varas Chile,,
2,La Paz 471,Puerto Varas 5550000 Chile,531,,La Paz 471,Puerto Varas 5550000 Chile,,
3,Imperial 695,Puerto Varas 12345 Chile,347,,Imperial 695,Puerto Varas 12345 Chile,,
4,Del Salvador 024,Puerto Varas 5550000 Chile,861,,Del Salvador 024,Puerto Varas 5550000 Chile,,
5,Ruta 225,Km. 27 Sector Los Riscos,Puerto Varas 5550000 Chile,339.0,,Ruta 225,Km. 27 Sector Los Riscos,Puerto Varas 5550000 Chile
6,Manzanal 66 Corner Rosario,Puerto Varas 5550587 Chile,342,,Manzanal 66 Corner Rosario,Puerto Varas 5550587 Chile,,
7,Klenner 349,Puerto Varas 5550454 Chile,970,,Klenner 349,Puerto Varas 5550454 Chile,,
8,Klener 195 Pasaje Luis Wellmann,Puerto Varas 5550447 Chile,2,950.0,,Klener 195 Pasaje Luis Wellmann,Puerto Varas 5550447 Chile,
9,Imperial 0211,Puerto Varas 5550000 Chile,821,,Imperial 0211,Puerto Varas 5550000 Chile,,


In [None]:
df_chile_text_sep['localidad'] = [ str(x)+' '+str(y)+' '+str(z) for x,y,z in df_chile_text_sep[['x1','x2','x8']].values]
df_chile_text_sep

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,localidad
0,Los Colonos 1183,Puerto Varas 5550100 Chile,3,,Los Colonos 1183,Puerto Varas 5550100 Chile,,,Los Colonos 1183 Puerto Varas 5550100 Chile None
1,Avenida Los Colonos no. 60,Puerto Varas Chile,263,,Avenida Los Colonos no. 60,Puerto Varas Chile,,,Avenida Los Colonos no. 60 Puerto Varas Chile None
2,La Paz 471,Puerto Varas 5550000 Chile,531,,La Paz 471,Puerto Varas 5550000 Chile,,,La Paz 471 Puerto Varas 5550000 Chile None
3,Imperial 695,Puerto Varas 12345 Chile,347,,Imperial 695,Puerto Varas 12345 Chile,,,Imperial 695 Puerto Varas 12345 Chile None
4,Del Salvador 024,Puerto Varas 5550000 Chile,861,,Del Salvador 024,Puerto Varas 5550000 Chile,,,Del Salvador 024 Puerto Varas 5550000 Chile None
5,Ruta 225,Km. 27 Sector Los Riscos,Puerto Varas 5550000 Chile,339.0,,Ruta 225,Km. 27 Sector Los Riscos,Puerto Varas 5550000 Chile,Ruta 225 Km. 27 Sector Los Riscos Puerto Varas 5550000 Chile
6,Manzanal 66 Corner Rosario,Puerto Varas 5550587 Chile,342,,Manzanal 66 Corner Rosario,Puerto Varas 5550587 Chile,,,Manzanal 66 Corner Rosario Puerto Varas 5550587 Chile None
7,Klenner 349,Puerto Varas 5550454 Chile,970,,Klenner 349,Puerto Varas 5550454 Chile,,,Klenner 349 Puerto Varas 5550454 Chile None
8,Klener 195 Pasaje Luis Wellmann,Puerto Varas 5550447 Chile,2,950.0,,Klener 195 Pasaje Luis Wellmann,Puerto Varas 5550447 Chile,,Klener 195 Pasaje Luis Wellmann Puerto Varas 5550447 Chile None
9,Imperial 0211,Puerto Varas 5550000 Chile,821,,Imperial 0211,Puerto Varas 5550000 Chile,,,Imperial 0211 Puerto Varas 5550000 Chile None


In [None]:
df_chile_text_sep['localidad'] = df_chile_text_sep['localidad'].str.replace('None', '')
df_chile_text_sep

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,localidad
0,Los Colonos 1183,Puerto Varas 5550100 Chile,3,,Los Colonos 1183,Puerto Varas 5550100 Chile,,,Los Colonos 1183 Puerto Varas 5550100 Chile
1,Avenida Los Colonos no. 60,Puerto Varas Chile,263,,Avenida Los Colonos no. 60,Puerto Varas Chile,,,Avenida Los Colonos no. 60 Puerto Varas Chile
2,La Paz 471,Puerto Varas 5550000 Chile,531,,La Paz 471,Puerto Varas 5550000 Chile,,,La Paz 471 Puerto Varas 5550000 Chile
3,Imperial 695,Puerto Varas 12345 Chile,347,,Imperial 695,Puerto Varas 12345 Chile,,,Imperial 695 Puerto Varas 12345 Chile
4,Del Salvador 024,Puerto Varas 5550000 Chile,861,,Del Salvador 024,Puerto Varas 5550000 Chile,,,Del Salvador 024 Puerto Varas 5550000 Chile
5,Ruta 225,Km. 27 Sector Los Riscos,Puerto Varas 5550000 Chile,339.0,,Ruta 225,Km. 27 Sector Los Riscos,Puerto Varas 5550000 Chile,Ruta 225 Km. 27 Sector Los Riscos Puerto Varas 5550000 Chile
6,Manzanal 66 Corner Rosario,Puerto Varas 5550587 Chile,342,,Manzanal 66 Corner Rosario,Puerto Varas 5550587 Chile,,,Manzanal 66 Corner Rosario Puerto Varas 5550587 Chile
7,Klenner 349,Puerto Varas 5550454 Chile,970,,Klenner 349,Puerto Varas 5550454 Chile,,,Klenner 349 Puerto Varas 5550454 Chile
8,Klener 195 Pasaje Luis Wellmann,Puerto Varas 5550447 Chile,2,950.0,,Klener 195 Pasaje Luis Wellmann,Puerto Varas 5550447 Chile,,Klener 195 Pasaje Luis Wellmann Puerto Varas 5550447 Chile
9,Imperial 0211,Puerto Varas 5550000 Chile,821,,Imperial 0211,Puerto Varas 5550000 Chile,,,Imperial 0211 Puerto Varas 5550000 Chile


In [None]:
df_chile_text_sep['localidad'] = df_chile_text_sep['localidad'].str.replace('5550000 ', '')
df_chile_text_sep

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,localidad
0,Los Colonos 1183,Puerto Varas 5550100 Chile,3,,Los Colonos 1183,Puerto Varas 5550100 Chile,,,Los Colonos 1183 Puerto Varas 5550100 Chile
1,Avenida Los Colonos no. 60,Puerto Varas Chile,263,,Avenida Los Colonos no. 60,Puerto Varas Chile,,,Avenida Los Colonos no. 60 Puerto Varas Chile
2,La Paz 471,Puerto Varas 5550000 Chile,531,,La Paz 471,Puerto Varas 5550000 Chile,,,La Paz 471 Puerto Varas Chile
3,Imperial 695,Puerto Varas 12345 Chile,347,,Imperial 695,Puerto Varas 12345 Chile,,,Imperial 695 Puerto Varas 12345 Chile
4,Del Salvador 024,Puerto Varas 5550000 Chile,861,,Del Salvador 024,Puerto Varas 5550000 Chile,,,Del Salvador 024 Puerto Varas Chile
5,Ruta 225,Km. 27 Sector Los Riscos,Puerto Varas 5550000 Chile,339.0,,Ruta 225,Km. 27 Sector Los Riscos,Puerto Varas 5550000 Chile,Ruta 225 Km. 27 Sector Los Riscos Puerto Varas Chile
6,Manzanal 66 Corner Rosario,Puerto Varas 5550587 Chile,342,,Manzanal 66 Corner Rosario,Puerto Varas 5550587 Chile,,,Manzanal 66 Corner Rosario Puerto Varas 5550587 Chile
7,Klenner 349,Puerto Varas 5550454 Chile,970,,Klenner 349,Puerto Varas 5550454 Chile,,,Klenner 349 Puerto Varas 5550454 Chile
8,Klener 195 Pasaje Luis Wellmann,Puerto Varas 5550447 Chile,2,950.0,,Klener 195 Pasaje Luis Wellmann,Puerto Varas 5550447 Chile,,Klener 195 Pasaje Luis Wellmann Puerto Varas 5550447 Chile
9,Imperial 0211,Puerto Varas 5550000 Chile,821,,Imperial 0211,Puerto Varas 5550000 Chile,,,Imperial 0211 Puerto Varas Chile


In [None]:
df_chile_text_sep['localidad'] = df_chile_text_sep['localidad'].str.replace('5550100', '')
df_chile_text_sep

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,localidad
0,Los Colonos 1183,Puerto Varas 5550100 Chile,3,,Los Colonos 1183,Puerto Varas 5550100 Chile,,,Los Colonos 1183 Puerto Varas Chile
1,Avenida Los Colonos no. 60,Puerto Varas Chile,263,,Avenida Los Colonos no. 60,Puerto Varas Chile,,,Avenida Los Colonos no. 60 Puerto Varas Chile
2,La Paz 471,Puerto Varas 5550000 Chile,531,,La Paz 471,Puerto Varas 5550000 Chile,,,La Paz 471 Puerto Varas Chile
3,Imperial 695,Puerto Varas 12345 Chile,347,,Imperial 695,Puerto Varas 12345 Chile,,,Imperial 695 Puerto Varas 12345 Chile
4,Del Salvador 024,Puerto Varas 5550000 Chile,861,,Del Salvador 024,Puerto Varas 5550000 Chile,,,Del Salvador 024 Puerto Varas Chile
5,Ruta 225,Km. 27 Sector Los Riscos,Puerto Varas 5550000 Chile,339.0,,Ruta 225,Km. 27 Sector Los Riscos,Puerto Varas 5550000 Chile,Ruta 225 Km. 27 Sector Los Riscos Puerto Varas Chile
6,Manzanal 66 Corner Rosario,Puerto Varas 5550587 Chile,342,,Manzanal 66 Corner Rosario,Puerto Varas 5550587 Chile,,,Manzanal 66 Corner Rosario Puerto Varas 5550587 Chile
7,Klenner 349,Puerto Varas 5550454 Chile,970,,Klenner 349,Puerto Varas 5550454 Chile,,,Klenner 349 Puerto Varas 5550454 Chile
8,Klener 195 Pasaje Luis Wellmann,Puerto Varas 5550447 Chile,2,950.0,,Klener 195 Pasaje Luis Wellmann,Puerto Varas 5550447 Chile,,Klener 195 Pasaje Luis Wellmann Puerto Varas 5550447 Chile
9,Imperial 0211,Puerto Varas 5550000 Chile,821,,Imperial 0211,Puerto Varas 5550000 Chile,,,Imperial 0211 Puerto Varas Chile


In [None]:
df_chile_text_sep['localidad'] = df_chile_text_sep['localidad'].str.replace('5550587', '')
df_chile_text_sep['localidad'] = df_chile_text_sep['localidad'].str.replace('5550454', '')
df_chile_text_sep['localidad'] = df_chile_text_sep['localidad'].str.replace('5550447', '')
df_chile_text_sep['localidad'] = df_chile_text_sep['localidad'].str.replace('12345', '')
df_chile_text_sep

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,localidad
0,Los Colonos 1183,Puerto Varas 5550100 Chile,3,,Los Colonos 1183,Puerto Varas 5550100 Chile,,,Los Colonos 1183 Puerto Varas Chile
1,Avenida Los Colonos no. 60,Puerto Varas Chile,263,,Avenida Los Colonos no. 60,Puerto Varas Chile,,,Avenida Los Colonos no. 60 Puerto Varas Chile
2,La Paz 471,Puerto Varas 5550000 Chile,531,,La Paz 471,Puerto Varas 5550000 Chile,,,La Paz 471 Puerto Varas Chile
3,Imperial 695,Puerto Varas 12345 Chile,347,,Imperial 695,Puerto Varas 12345 Chile,,,Imperial 695 Puerto Varas Chile
4,Del Salvador 024,Puerto Varas 5550000 Chile,861,,Del Salvador 024,Puerto Varas 5550000 Chile,,,Del Salvador 024 Puerto Varas Chile
5,Ruta 225,Km. 27 Sector Los Riscos,Puerto Varas 5550000 Chile,339.0,,Ruta 225,Km. 27 Sector Los Riscos,Puerto Varas 5550000 Chile,Ruta 225 Km. 27 Sector Los Riscos Puerto Varas Chile
6,Manzanal 66 Corner Rosario,Puerto Varas 5550587 Chile,342,,Manzanal 66 Corner Rosario,Puerto Varas 5550587 Chile,,,Manzanal 66 Corner Rosario Puerto Varas Chile
7,Klenner 349,Puerto Varas 5550454 Chile,970,,Klenner 349,Puerto Varas 5550454 Chile,,,Klenner 349 Puerto Varas Chile
8,Klener 195 Pasaje Luis Wellmann,Puerto Varas 5550447 Chile,2,950.0,,Klener 195 Pasaje Luis Wellmann,Puerto Varas 5550447 Chile,,Klener 195 Pasaje Luis Wellmann Puerto Varas Chile
9,Imperial 0211,Puerto Varas 5550000 Chile,821,,Imperial 0211,Puerto Varas 5550000 Chile,,,Imperial 0211 Puerto Varas Chile


In [None]:
df_chile_text_sep['latitud']= df_chile_text_sep['localidad'].apply(lambda x: geolocator.geocode(x).latitude if geolocator.geocode(x) else None)
df_chile_text_sep['longitud']= df_chile_text_sep['localidad'].apply(lambda x: geolocator.geocode(x).longitude if geolocator.geocode(x) else None)
df_chile_text_sep

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,localidad,latitud,longitud
0,Los Colonos 1183,Puerto Varas 5550100 Chile,3,,Los Colonos 1183,Puerto Varas 5550100 Chile,,,Los Colonos 1183 Puerto Varas Chile,-41.329796,-72.961064
1,Avenida Los Colonos no. 60,Puerto Varas Chile,263,,Avenida Los Colonos no. 60,Puerto Varas Chile,,,Avenida Los Colonos no. 60 Puerto Varas Chile,-41.328733,-72.960613
2,La Paz 471,Puerto Varas 5550000 Chile,531,,La Paz 471,Puerto Varas 5550000 Chile,,,La Paz 471 Puerto Varas Chile,-41.31491,-72.985654
3,Imperial 695,Puerto Varas 12345 Chile,347,,Imperial 695,Puerto Varas 12345 Chile,,,Imperial 695 Puerto Varas Chile,-41.322458,-72.97683
4,Del Salvador 024,Puerto Varas 5550000 Chile,861,,Del Salvador 024,Puerto Varas 5550000 Chile,,,Del Salvador 024 Puerto Varas Chile,-41.317983,-72.981778
5,Ruta 225,Km. 27 Sector Los Riscos,Puerto Varas 5550000 Chile,339.0,,Ruta 225,Km. 27 Sector Los Riscos,Puerto Varas 5550000 Chile,Ruta 225 Km. 27 Sector Los Riscos Puerto Varas Chile,,
6,Manzanal 66 Corner Rosario,Puerto Varas 5550587 Chile,342,,Manzanal 66 Corner Rosario,Puerto Varas 5550587 Chile,,,Manzanal 66 Corner Rosario Puerto Varas Chile,,
7,Klenner 349,Puerto Varas 5550454 Chile,970,,Klenner 349,Puerto Varas 5550454 Chile,,,Klenner 349 Puerto Varas Chile,-41.314247,-72.983703
8,Klener 195 Pasaje Luis Wellmann,Puerto Varas 5550447 Chile,2,950.0,,Klener 195 Pasaje Luis Wellmann,Puerto Varas 5550447 Chile,,Klener 195 Pasaje Luis Wellmann Puerto Varas Chile,,
9,Imperial 0211,Puerto Varas 5550000 Chile,821,,Imperial 0211,Puerto Varas 5550000 Chile,,,Imperial 0211 Puerto Varas Chile,-41.320709,-72.983904


In [None]:
df_chile_text_sep2 = df_chile_text_sep[['localidad','latitud','longitud']]
df_chile_text_sep2

Unnamed: 0,localidad,latitud,longitud
0,Los Colonos 1183 Puerto Varas Chile,-41.329796,-72.961064
1,Avenida Los Colonos no. 60 Puerto Varas Chile,-41.328733,-72.960613
2,La Paz 471 Puerto Varas Chile,-41.31491,-72.985654
3,Imperial 695 Puerto Varas Chile,-41.322458,-72.97683
4,Del Salvador 024 Puerto Varas Chile,-41.317983,-72.981778
5,Ruta 225 Km. 27 Sector Los Riscos Puerto Varas Chile,,
6,Manzanal 66 Corner Rosario Puerto Varas Chile,,
7,Klenner 349 Puerto Varas Chile,-41.314247,-72.983703
8,Klener 195 Pasaje Luis Wellmann Puerto Varas Chile,,
9,Imperial 0211 Puerto Varas Chile,-41.320709,-72.983904


In [None]:
df_chile_ct = pd.concat([df_chile,df_chile_text_sep2], axis=1)
df_chile_ct

Unnamed: 0,categorias,direccion,nombre,precio,score,reviews,texto,localidad,latitud,longitud
0,Excellent,",Los Colonos 1183, Puerto Varas 5550100 Chile,3, ,Los Colonos 1183, Puerto Varas 5550100 Chile",Silvestre Cabanas,,4.5,3,"Los Colonos 1183, Puerto Varas 5550100 Chile,3, ,Los Colonos 1183, Puerto Varas 5550100 Chile",Los Colonos 1183 Puerto Varas Chile,-41.329796,-72.961064
1,Very good,",Avenida Los Colonos no. 60, Puerto Varas Chile,263, ,Avenida Los Colonos no. 60, Puerto Varas Chile",Puerto Chico Hotel,,4.0,263,"Avenida Los Colonos no. 60, Puerto Varas Chile,263, ,Avenida Los Colonos no. 60, Puerto Varas Chile",Avenida Los Colonos no. 60 Puerto Varas Chile,-41.328733,-72.960613
2,Very good,",La Paz 471, Puerto Varas 5550000 Chile,531, ,La Paz 471, Puerto Varas 5550000 Chile",Park Inn By Radisson Puerto Varas,,3.5,531,"La Paz 471, Puerto Varas 5550000 Chile,531, ,La Paz 471, Puerto Varas 5550000 Chile",La Paz 471 Puerto Varas Chile,-41.31491,-72.985654
3,Excellent,",Imperial 695, Puerto Varas 12345 Chile,347, ,Imperial 695, Puerto Varas 12345 Chile",Hotel Puelche,,4.5,347,"Imperial 695, Puerto Varas 12345 Chile,347, ,Imperial 695, Puerto Varas 12345 Chile",Imperial 695 Puerto Varas Chile,-41.322458,-72.97683
4,Excellent,",Del Salvador 024, Puerto Varas 5550000 Chile,861, ,Del Salvador 024, Puerto Varas 5550000 Chile",Radisson Hotel Puerto Varas,,4.5,861,"Del Salvador 024, Puerto Varas 5550000 Chile,861, ,Del Salvador 024, Puerto Varas 5550000 Chile",Del Salvador 024 Puerto Varas Chile,-41.317983,-72.981778
5,Excellent,",Ruta 225, Km. 27 Sector Los Riscos, Puerto Varas 5550000 Chile,339, ,Ruta 225, Km. 27 Sector Los Riscos, Puerto Varas 5550000 Chile",Hotel Awa,,4.5,339,"Ruta 225, Km. 27 Sector Los Riscos, Puerto Varas 5550000 Chile,339, ,Ruta 225, Km. 27 Sector Los Riscos, Puerto Varas 5550000 Chile",Ruta 225 Km. 27 Sector Los Riscos Puerto Varas Chile,,
6,Very good,",Manzanal 66 Corner Rosario, Puerto Varas 5550587 Chile,342, ,Manzanal 66 Corner Rosario, Puerto Varas 5550587 Chile",Casa Azul Hostel,,4.0,342,"Manzanal 66 Corner Rosario, Puerto Varas 5550587 Chile,342, ,Manzanal 66 Corner Rosario, Puerto Varas 5550587 Chile",Manzanal 66 Corner Rosario Puerto Varas Chile,,
7,Very good,",Klenner 349, Puerto Varas 5550454 Chile,970, ,Klenner 349, Puerto Varas 5550454 Chile",Enjoy Puerto Varas,,4.0,970,"Klenner 349, Puerto Varas 5550454 Chile,970, ,Klenner 349, Puerto Varas 5550454 Chile",Klenner 349 Puerto Varas Chile,-41.314247,-72.983703
8,Excellent,",Klener 195 Pasaje Luis Wellmann, Puerto Varas 5550447 Chile,2,950, ,Klener 195 Pasaje Luis Wellmann, Puerto Varas 5550447 Chile",Hotel Cabana del Lago,,4.5,950,"Klener 195 Pasaje Luis Wellmann, Puerto Varas 5550447 Chile,2,950, ,Klener 195 Pasaje Luis Wellmann, Puerto Varas 5550447 Chile",Klener 195 Pasaje Luis Wellmann Puerto Varas Chile,,
9,Excellent,",Imperial 0211, Puerto Varas 5550000 Chile,821, ,Imperial 0211, Puerto Varas 5550000 Chile",Solace Hotel,,4.5,821,"Imperial 0211, Puerto Varas 5550000 Chile,821, ,Imperial 0211, Puerto Varas 5550000 Chile",Imperial 0211 Puerto Varas Chile,-41.320709,-72.983904


In [None]:
df_chile_ct = df_chile_ct[['nombre','localidad','score','reviews','categorias','latitud','longitud']]
df_chile_ct

Unnamed: 0,nombre,localidad,score,reviews,categorias,latitud,longitud
0,Silvestre Cabanas,Los Colonos 1183 Puerto Varas Chile,4.5,3,Excellent,-41.329796,-72.961064
1,Puerto Chico Hotel,Avenida Los Colonos no. 60 Puerto Varas Chile,4.0,263,Very good,-41.328733,-72.960613
2,Park Inn By Radisson Puerto Varas,La Paz 471 Puerto Varas Chile,3.5,531,Very good,-41.31491,-72.985654
3,Hotel Puelche,Imperial 695 Puerto Varas Chile,4.5,347,Excellent,-41.322458,-72.97683
4,Radisson Hotel Puerto Varas,Del Salvador 024 Puerto Varas Chile,4.5,861,Excellent,-41.317983,-72.981778
5,Hotel Awa,Ruta 225 Km. 27 Sector Los Riscos Puerto Varas Chile,4.5,339,Excellent,,
6,Casa Azul Hostel,Manzanal 66 Corner Rosario Puerto Varas Chile,4.0,342,Very good,,
7,Enjoy Puerto Varas,Klenner 349 Puerto Varas Chile,4.0,970,Very good,-41.314247,-72.983703
8,Hotel Cabana del Lago,Klener 195 Pasaje Luis Wellmann Puerto Varas Chile,4.5,950,Excellent,,
9,Solace Hotel,Imperial 0211 Puerto Varas Chile,4.5,821,Excellent,-41.320709,-72.983904


In [None]:
df_chile_ct.loc[df_chile_ct['nombre']=='Hotel Awa',['latitud']] = -41.317802
df_chile_ct.loc[df_chile_ct['nombre']=='Hotel Awa',['longitud']] = -72.9829073
df_chile_ct.loc[df_chile_ct['nombre']=='Casa Azul Hostel',['latitud']] = -36.9695655
df_chile_ct.loc[df_chile_ct['nombre']=='Casa Azul Hostel',['longitud']] = -72.086518
df_chile_ct.loc[df_chile_ct['nombre']=='Hotel Cabana del Lago',['latitud']] = -41.3135006
df_chile_ct.loc[df_chile_ct['nombre']=='Hotel Cabana del Lago',['longitud']] =-72.9847564

In [None]:
df_chile_ct.to_csv('./data/hoteles_puerto_varas_prototipo.csv')

In [None]:
df_chile_ct['reviews'] = df_chile_ct['reviews'].astype(int)
df_chile_ct.to_csv('./data/hoteles_puerto_varas_prototipo.csv')
df_chile_ct

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chile_ct['reviews'] = df_chile_ct['reviews'].astype(int)


Unnamed: 0,nombre,localidad,score,reviews,categorias,latitud,longitud
0,Silvestre Cabanas,Los Colonos 1183 Puerto Varas Chile,4.5,3,Excellent,-41.329796,-72.961064
1,Puerto Chico Hotel,Avenida Los Colonos no. 60 Puerto Varas Chile,4.0,263,Very good,-41.328733,-72.960613
2,Park Inn By Radisson Puerto Varas,La Paz 471 Puerto Varas Chile,3.5,531,Very good,-41.31491,-72.985654
3,Hotel Puelche,Imperial 695 Puerto Varas Chile,4.5,347,Excellent,-41.322458,-72.97683
4,Radisson Hotel Puerto Varas,Del Salvador 024 Puerto Varas Chile,4.5,861,Excellent,-41.317983,-72.981778
5,Hotel Awa,Ruta 225 Km. 27 Sector Los Riscos Puerto Varas Chile,4.5,339,Excellent,-41.317802,-72.982907
6,Casa Azul Hostel,Manzanal 66 Corner Rosario Puerto Varas Chile,4.0,342,Very good,-36.969566,-72.086518
7,Enjoy Puerto Varas,Klenner 349 Puerto Varas Chile,4.0,970,Very good,-41.314247,-72.983703
8,Hotel Cabana del Lago,Klener 195 Pasaje Luis Wellmann Puerto Varas Chile,4.5,950,Excellent,-41.313501,-72.984756
9,Solace Hotel,Imperial 0211 Puerto Varas Chile,4.5,821,Excellent,-41.320709,-72.983904


In [None]:
!pip install geopandas shapely





[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import geopandas as gpd
from shapely.geometry import Point

In [None]:


# Ahora puedes hacer una búsqueda
#location = geolocator.geocode("avenida alemania 6002 valparaiso")
#print(location.address)


In [None]:
#print((location.latitude, location.longitude))

In [None]:
#d = {'ciudad': ['avenida alemania 6002 valparaiso', 'nataniel cox 1550 santiago', 'las heras 345 valparaiso','pedro montt 1229 valparaiso']}
#df_text = pd.DataFrame(d)
#df_text['latitud']= df_text['ciudad'].apply(lambda x: geolocator.geocode(x).latitude if geolocator.geocode(x) else None)
#df_text['longitud']= df_text['ciudad'].apply(lambda x: geolocator.geocode(x).longitude if geolocator.geocode(x) else None)
#df_text