In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json
from itertools import chain

## Atributos

In [2]:
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:130.0) Gecko/20100101 Firefox/130.0'

## Clase de requests

In [3]:
class Requester():
    def __init__(self):
        self.__requests_tries = 3
        self.timeout = 15
        self.__init_session()

    def __init_session(self):
        """
        Initializes a session object for making requests.

        Returns:
            requests.Session: The session object.

        """
        self.session = requests.Session()

    def get_requests(self, url, headers, proxy=None):
        """
        Sends a GET request to the specified URL using the provided session, headers, and proxy.

        Args:
            session (requests.Session): The session object to use for making the request.
            url (str): The URL to send the request to.
            headers (dict): The headers to include in the request.
            proxy (dict, optional): The proxy to use for the request. Defaults to None.

        Returns:
            requests.Response or None: The response object if the request is successful (status code 200), 
            otherwise None.

        Raises:
            requests.exceptions.RequestException: If an error occurs while making the request.
            requests.exceptions.Timeout: If the request times out.

        """
        for _ in range(self.__requests_tries):
            try:
                response = self.session.get(
                    url,
                    headers=headers,
                    proxies=proxy,
                    timeout=self.timeout
                )
                if response.status_code == 200:
                    return response
            except requests.exceptions.RequestException as e:
                print(e)
            except requests.exceptions.Timeout as e:
                print(e)
        return None


    def post_requests(self, url, headers, data=None, proxy=None):
        """
        Sends a POST request to the specified URL with the given headers, data, and proxy.

        Parameters:
        url (str): The URL to send the POST request to.
        headers (dict): The headers to include in the request.
        data (dict, optional): The data to include in the request body. Defaults to None.
        proxy (dict, optional): The proxy to use for the request. Defaults to None.

        Returns:
        response (requests.Response): The response object if the request is successful and the status code is 200.
        None: If the request fails or the status code is not 200.
        """
        for _ in range(self.__requests_tries):
            try:
                response = self.session.post(
                    url,
                    headers=headers,
                    data=data,
                    proxies=proxy
                )
                if response.status_code == 200:
                    return response
            except requests.exceptions.RequestException as e:
                print(e)
            except requests.exceptions.Timeout as e:
                print(e)
        return None

In [4]:
url_json_sale = 'https://century21mexico.com/v/resultados/operacion_venta?json=true'
url_json_rent = 'https://century21mexico.com/v/resultados/operacion_renta?json=true'
headers_json = {
    'Host': 'century21mexico.com',
    'User-Agent': user_agent,
    'Accept': 'application/json, text/plain, */*',
    'Accept-Language': 'en-CA,en-US;q=0.7,en;q=0.3',
    'Accept-Encoding': 'gzip, deflate, br, zstd',
    'DNT' : '1',
    'Sec-GPC' : '1',
    'Connection' : 'keep-alive',
    'Referer' : 'https://century21mexico.com/v/resultados',
    'Sec-Fetch-Dest' : 'empty',
    'Sec-Fetch-Mode' : 'cors',
    'Sec-Fetch-Site' : 'same-origin'
}

In [7]:
def get_price_range(req, url, header):
    start_price = 0
    hits_counter = 0
    subtotal_hits_results = 0
    urls_price = []

    response = req.get_requests(url, header)
    response_json = response.json()
    total_hits_results = int(response_json['totalHits'].replace(',',''))
    if response_json['filtros'][1]['validValues'][1]['isActive'] == True:
        end_price = 12000
        increment = end_price
    else:
        end_price = 500000
        increment = end_price
    url_final = url.replace('?json=true', '')
    while hits_counter < total_hits_results - subtotal_hits_results:
        if (total_hits_results - hits_counter) > 1500:
            url_price = f'{url_final}/precio-desde_{start_price}/precio-hasta_{end_price}?json=true'
        else:
            url_price = f'{url_final}/precio-desde_{start_price}?json=true'
        response_price = req.get_requests(url_price, header)
        response_price_json = response_price.json()
        subtotal_hits_results = int(response_price_json['totalHits'].replace(',',''))
        hits_counter += subtotal_hits_results
        start_price = end_price + 1
        end_price += increment
        urls_price.append(url_price)
    return urls_price

In [8]:
def get_page_number(req, url, header):
    response = req.get_requests(url, header)
    response_json = response.json()
    pages = response_json['filtros'][20]['validValues']
    url_page = [page['url'] for page in pages]
    urls_page = []
    for i in range(len(url_page)):
        urls_page.append(url.replace('?', f'{url_page[i]}?'))
    return urls_page

In [9]:
req = Requester()

In [10]:
req.session.cookies

<RequestsCookieJar[]>

In [32]:
list1 = get_price_range(req, url_json_rent,headers_json)
list1


['https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_12000?json=true',
 'https://century21mexico.com/v/resultados/operacion_renta/precio-desde_12001/precio-hasta_24000?json=true',
 'https://century21mexico.com/v/resultados/operacion_renta/precio-desde_24001/precio-hasta_36000?json=true',
 'https://century21mexico.com/v/resultados/operacion_renta/precio-desde_36001?json=true']

In [33]:
list2 = [get_page_number(req,li,headers_json ) for li in list1]
list2

[['https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_12000/pagina_1?json=true',
  'https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_12000/pagina_2?json=true',
  'https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_12000/pagina_3?json=true',
  'https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_12000/pagina_4?json=true',
  'https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_12000/pagina_5?json=true',
  'https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_12000/pagina_6?json=true',
  'https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_12000/pagina_7?json=true',
  'https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_12000/pagina_8?json=true',
  'https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/preci

In [44]:
lista_aplanada = list(chain(*list2))
lista_aplanada[:12]

['https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_12000/pagina_1?json=true',
 'https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_12000/pagina_2?json=true',
 'https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_12000/pagina_3?json=true',
 'https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_12000/pagina_4?json=true',
 'https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_12000/pagina_5?json=true',
 'https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_12000/pagina_6?json=true',
 'https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_12000/pagina_7?json=true',
 'https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_12000/pagina_8?json=true',
 'https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_1

In [17]:
def get_results_info(req, url, header):
    response = req.get_requests(url, header)
    response_results = response.json()
    result = response_results['results']
    return result

In [61]:
def scrape_data(url, header, df):
    req = Requester()
    price_list = get_price_range(req, url, header)
    price_list_pages = [get_page_number(req, url_price, header) for url_price in price_list]
    price_list_pages_p = list(chain(*price_list_pages))
    for url_page in price_list_pages_p:
        data = get_results_info(req, url_page, header)
        df = pd.concat([df, pd.DataFrame(data)], ignore_index=True)
    return df

In [59]:
df = pd.DataFrame()
df

In [62]:
df = scrape_data(url_json_rent, headers_json, df)

In [63]:
df

Unnamed: 0,precios,mantenimiento,precioFormat,precioMapa,precioSecundarioFormat,monedaSecundaria,mostrarMonedaSecundaria,tipoOperacionTxt,tipoPropiedadEnTipoOperacion,diasModificacionTxt,...,idAsesor,telefono,whatsapp,email,asesorNombre,asesorThumbnail,asesorFotoMostrarEnInternet,logoOficina,nombreAfiliado,fotos
0,"{'vista': {'precio': 6748.25725, 'moneda': 'MX...",,$350 USD,350,"$6,748 MXN",MXN,False,en renta,Local en renta,Hoy,...,30424,+52 664 308 0700,+526643080700,ceballos@c21fortaleza.com,ALEJANDRO CEBALLOS LOPEZ,https://cdn.21online.lat/mexico/cache/awsTest1...,True,https://cdn.21online.lat/mexico/cache/awsTest1...,CENTURY 21 Fortaleza,"{'totalFotos': 5, 'propiedadThumbnail': ['http..."
1,"{'vista': {'precio': 10000, 'moneda': 'MXN', '...",,"$10,000 MXN",10000,"$10,000 MXN",MXN,False,en renta,Departamento en renta,Ayer,...,32597,+52 55 5386 0475,,ventas@c21goldteam.com.mx,Armida Rubí MartÍnez Rodriguez,https://cdn.21online.lat/mexico/cache/awsTest1...,True,https://cdn.21online.lat/mexico/cache/awsTest1...,CENTURY 21 Gold Team,"{'totalFotos': 15, 'propiedadThumbnail': ['htt..."
2,"{'vista': {'precio': 10000, 'moneda': 'MXN', '...",+ 700 mantenimiento,"$10,000 MXN",10000,"$10,000 MXN",MXN,False,en renta,Departamento en renta,Ayer,...,23754,+52 55 5603 3434,,clientes@century21felix.com,GABRIELA PORTILLA RONQUILLO,https://cdn.21online.lat/mexico/cache/awsTest1...,True,https://cdn.21online.lat/mexico/cache/awsTest1...,CENTURY 21 Félix & Asociados,"{'totalFotos': 8, 'propiedadThumbnail': ['http..."
3,"{'vista': {'precio': 11500, 'moneda': 'MXN', '...",,"$11,500 MXN",11500,"$11,500 MXN",MXN,False,en renta,Casa en renta,Actualizado hace 2 días,...,25020,+52 664 199 3171,+526641993171,century21@c21cosmopolitan.com,Denise Romero,https://cdn.21online.lat/mexico/cache/awsTest1...,True,https://cdn.21online.lat/mexico/cache/awsTest1...,CENTURY 21 Cosmopolitan,"{'totalFotos': 15, 'propiedadThumbnail': ['htt..."
4,"{'vista': {'precio': 10500, 'moneda': 'MXN', '...",,"$10,500 MXN",10500,"$10,500 MXN",MXN,False,en renta,Departamento en renta,Actualizado hace 2 días,...,26390,+52 449 911 5126,+524499115126,ventas@c21bonaterra.com,Rosela Zermeño Martinez,https://cdn.21online.lat/mexico/cache/awsTest1...,False,https://cdn.21online.lat/mexico/cache/awsTest1...,CENTURY 21 Bonaterra,"{'totalFotos': 14, 'propiedadThumbnail': ['htt..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4540,"{'vista': {'precio': 50000, 'moneda': 'MXN', '...",,"$50,000 MXN",50000,"$50,000 MXN",MXN,False,en renta,Bodega en renta,Actualizado hace 712 días,...,11491,+52 921 267 1668,+529212671668,asesor16@c21habitat.com.mx,Maria Martinez Gomez,https://cdn.21online.lat/mexico/cache/awsTest1...,False,https://cdn.21online.lat/mexico/cache/awsTest1...,CENTURY 21 Hábitat,"{'totalFotos': 20, 'propiedadThumbnail': ['htt..."
4541,"{'vista': {'precio': 75400, 'moneda': 'MXN', '...",,"$75,400 MXN",75400,"$75,400 MXN",MXN,False,en renta,Local en renta,Actualizado hace 641 días,...,11764,+52 921 212 0462,,gerencia@c21habitat.com.mx,Amelia Valencia Villanueva,https://cdn.21online.lat/mexico/cache/awsTest1...,False,https://cdn.21online.lat/mexico/cache/awsTest1...,CENTURY 21 Hábitat,"{'totalFotos': 5, 'propiedadThumbnail': ['http..."
4542,"{'vista': {'precio': 67280, 'moneda': 'MXN', '...",,"$67,280 MXN",67280,"$67,280 MXN",MXN,False,en renta,Local en renta,Actualizado hace 641 días,...,11764,+52 921 212 0462,,gerencia@c21habitat.com.mx,Amelia Valencia Villanueva,https://cdn.21online.lat/mexico/cache/awsTest1...,False,https://cdn.21online.lat/mexico/cache/awsTest1...,CENTURY 21 Hábitat,"{'totalFotos': 8, 'propiedadThumbnail': ['http..."
4543,"{'vista': {'precio': 52839, 'moneda': 'MXN', '...",,"$52,839 MXN",52839,"$52,839 MXN",MXN,False,en renta,Local en renta,Actualizado hace 633 días,...,5565,+52 662 182 0023,+526621820023,Leonorcampoy@hotmail.com,Maria Leonor Campoy Burboa,https://cdn.21online.lat/mexico/cache/awsTest1...,True,https://cdn.21online.lat/mexico/cache/awsTest1...,CENTURY 21 Elga,"{'totalFotos': 4, 'propiedadThumbnail': ['http..."


In [64]:
df = scrape_data(url_json_sale, headers_json, df)
df

  df = pd.concat([df, pd.DataFrame(data)], ignore_index=True)


Unnamed: 0,precios,mantenimiento,precioFormat,precioMapa,precioSecundarioFormat,monedaSecundaria,mostrarMonedaSecundaria,tipoOperacionTxt,tipoPropiedadEnTipoOperacion,diasModificacionTxt,...,idAsesor,telefono,whatsapp,email,asesorNombre,asesorThumbnail,asesorFotoMostrarEnInternet,logoOficina,nombreAfiliado,fotos
0,"{'vista': {'precio': 6748.25725, 'moneda': 'MX...",,$350 USD,350,"$6,748 MXN",MXN,False,en renta,Local en renta,Hoy,...,30424,+52 664 308 0700,+526643080700,ceballos@c21fortaleza.com,ALEJANDRO CEBALLOS LOPEZ,https://cdn.21online.lat/mexico/cache/awsTest1...,True,https://cdn.21online.lat/mexico/cache/awsTest1...,CENTURY 21 Fortaleza,"{'totalFotos': 5, 'propiedadThumbnail': ['http..."
1,"{'vista': {'precio': 10000, 'moneda': 'MXN', '...",,"$10,000 MXN",10000,"$10,000 MXN",MXN,False,en renta,Departamento en renta,Ayer,...,32597,+52 55 5386 0475,,ventas@c21goldteam.com.mx,Armida Rubí MartÍnez Rodriguez,https://cdn.21online.lat/mexico/cache/awsTest1...,True,https://cdn.21online.lat/mexico/cache/awsTest1...,CENTURY 21 Gold Team,"{'totalFotos': 15, 'propiedadThumbnail': ['htt..."
2,"{'vista': {'precio': 10000, 'moneda': 'MXN', '...",+ 700 mantenimiento,"$10,000 MXN",10000,"$10,000 MXN",MXN,False,en renta,Departamento en renta,Ayer,...,23754,+52 55 5603 3434,,clientes@century21felix.com,GABRIELA PORTILLA RONQUILLO,https://cdn.21online.lat/mexico/cache/awsTest1...,True,https://cdn.21online.lat/mexico/cache/awsTest1...,CENTURY 21 Félix & Asociados,"{'totalFotos': 8, 'propiedadThumbnail': ['http..."
3,"{'vista': {'precio': 11500, 'moneda': 'MXN', '...",,"$11,500 MXN",11500,"$11,500 MXN",MXN,False,en renta,Casa en renta,Actualizado hace 2 días,...,25020,+52 664 199 3171,+526641993171,century21@c21cosmopolitan.com,Denise Romero,https://cdn.21online.lat/mexico/cache/awsTest1...,True,https://cdn.21online.lat/mexico/cache/awsTest1...,CENTURY 21 Cosmopolitan,"{'totalFotos': 15, 'propiedadThumbnail': ['htt..."
4,"{'vista': {'precio': 10500, 'moneda': 'MXN', '...",,"$10,500 MXN",10500,"$10,500 MXN",MXN,False,en renta,Departamento en renta,Actualizado hace 2 días,...,26390,+52 449 911 5126,+524499115126,ventas@c21bonaterra.com,Rosela Zermeño Martinez,https://cdn.21online.lat/mexico/cache/awsTest1...,False,https://cdn.21online.lat/mexico/cache/awsTest1...,CENTURY 21 Bonaterra,"{'totalFotos': 14, 'propiedadThumbnail': ['htt..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23008,"{'vista': {'precio': 33000000.000000004, 'mone...",,"$33,000,000 MXN",33.0M,"$33,000,000 MXN",MXN,False,en venta,Terreno en venta,Actualizado hace 712 días,...,845,+52 55 5662 9700,,informes@c21platinum.com.mx,Laura Maldonado,https://cdn.21online.lat/mexico/cache/awsTest1...,True,https://cdn.21online.lat/mexico/cache/awsTest1...,CENTURY 21 Platinum,"{'totalFotos': 6, 'propiedadThumbnail': ['http..."
23009,"{'vista': {'precio': 33500000, 'moneda': 'MXN'...",,"$33,500,000 MXN",33.5M,"$33,500,000 MXN",MXN,False,en venta,Terreno en venta,Actualizado hace 382 días,...,16054,+52 55 3906 1677,+525539061677,ventas@century21eden.com,Rene Zepeda Hurtado,https://cdn.21online.lat/mexico/cache/awsTest1...,True,https://cdn.21online.lat/mexico/cache/awsTest1...,CENTURY 21 Edén,"{'totalFotos': 7, 'propiedadThumbnail': ['http..."
23010,"{'vista': {'precio': 45000000, 'moneda': 'MXN'...",,"$45,000,000 MXN",45.0M,"$45,000,000 MXN",MXN,False,en venta,Edificio en venta,Actualizado hace 633 días,...,10085,+52 744 484 3000,,direccion@century21lacunza.com.mx,José Luis Infante Tafolla,https://cdn.21online.lat/mexico/cache/awsTest1...,True,https://cdn.21online.lat/mexico/cache/awsTest1...,CENTURY 21 Lacunza & Asociados,"{'totalFotos': 19, 'propiedadThumbnail': ['htt..."
23011,"{'vista': {'precio': 110864226.25, 'moneda': '...",,"$5,750,000 USD",5.8M,"$110,864,226 MXN",MXN,False,en venta,Casa en venta,Actualizado hace 74 días,...,10110,+52 744 484 3000,,direccion@century21lacunza.com.mx,Deyanira Terrazas Castro,https://cdn.21online.lat/mexico/cache/awsTest1...,True,https://cdn.21online.lat/mexico/cache/awsTest1...,CENTURY 21 Lacunza & Asociados,"{'totalFotos': 67, 'propiedadThumbnail': ['htt..."


In [57]:
price_list = get_price_range(req, url_json_rent, headers_json)
price_list_pages = [get_page_number(req, url_price, headers_json) for url_price in price_list]
price_list_pages_p = list(chain(*price_list_pages))
for url in price_list_pages_p:
    response = req.get_requests(url, headers_json)
    response_results = response.json()
    asd = len(response_results['results'])
    print(url)
    print(asd)


    


https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_12000/pagina_1?json=true
100
https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_12000/pagina_2?json=true
100
https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_12000/pagina_3?json=true
100
https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_12000/pagina_4?json=true
100
https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_12000/pagina_5?json=true
100
https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_12000/pagina_6?json=true
100
https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_12000/pagina_7?json=true
100
https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_12000/pagina_8?json=true
100
https://century21mexico.com/v/resultados/operacion_renta/precio-desde_0/precio-hasta_120