# Code

In [1]:
import nest_asyncio
import asyncio
import aiohttp
from bs4 import BeautifulSoup
import pandas as pd
from IPython.display import HTML

nest_asyncio.apply()  # Allow nested event loops in Jupyter

In [2]:
def convert_price(price_text):
    # Remove 'Rp' and spaces, and handle the commas
    price_text = price_text.replace('Rp', '').replace(' ', '').strip()
    
    # Regular expression to match 'juta' (million) and 'miliar' (billion)
    if 'Miliar' in price_text:
        price_value = float(price_text.replace('Miliar', '').replace(',', '.').strip())
        return price_value * 1000  # Convert to millions (1 miliar = 1000 juta)
    elif 'Juta' in price_text:
        price_value = float(price_text.replace('Juta', '').replace(',', '.').strip())
        return price_value
    else:
        # Handle any other cases (e.g., if the price is in plain numbers)
        return float(price_text.replace(',', '.').strip())
    
def convert_area(area_text):
    # Remove non-numeric characters (e.g., 'm²', spaces, etc.)
    area_value = area_text.replace("LT : ", "").replace("LB : ", "").split(" ")[0]
    return float(area_value)  # Convert to float

In [10]:
async def fetch_page(session, url):
    async with session.get(url) as response:
        return await response.text()

async def parse_page(base_url, page):
    url = f"{base_url}&page={page}"
    async with aiohttp.ClientSession() as session:
        html = await fetch_page(session, url)
        soup = BeautifulSoup(html, 'html.parser')
        listings = soup.find_all(class_='ui-organism-intersection__element')

        data = []
        for listing in listings:
            try:
                title = listing.find('h2').text
                price = convert_price(listing.find(class_='card-featured__middle-section__price').text)
                location = listing.select_one(".card-featured__middle-section > span").text
                attributes_text = listing.select('.attribute-text')
                bedrooms = float(attributes_text[0].text) if len(attributes_text) > 0 else 0
                bathrooms = float(attributes_text[1].text) if len(attributes_text) > 1 else 0
                attributes_info = listing.select('.attribute-info')
                land_area = convert_area(attributes_info[0].text.split(" ")[2]) if len(attributes_info) > 0 else "N/A"
                building_area = convert_area(attributes_info[1].text.split(" ")[2]) if len(attributes_info) > 1 else "N/A"
                agent_name = listing.select_one('.name').text
                url = 'https://rumah123.com/' + listing.find('a')['href']
                price_per_bedroom = (0.2 * price) / 12 / bedrooms if bedrooms > 0 else 0
                cost_per_bedroom = price / bedrooms if bedrooms > 0 else 0
                area_per_bedroom = building_area / bedrooms if bedrooms > 0 else 0

                data.append({
                    'Title': title,
                    'Price': price,
                    'Location': location,
                    'Bedrooms': bedrooms,
                    'Bathrooms': bathrooms,
                    'Land Area': land_area,
                    'Building Area': building_area,
                    'Agent Name': agent_name,
                    'URL': url,
                    'Price per Bedroom': price_per_bedroom,
                    'Cost per Bedroom': cost_per_bedroom,
                    'Area per Bedroom': area_per_bedroom
                })
            except Exception as e:
                pass
                
        return data

async def main(base, minPrice, minLandArea, minBuiltupSize, maxLandArea, maxBuiltupSize, max_page=0):
    base_url = f"https://www.rumah123.com/{base}/?maxBuiltupSize={maxBuiltupSize}&maxLandArea={maxLandArea}&minBuiltupSize={minBuiltupSize}&minLandArea={minLandArea}&minPrice={minPrice}&sort=price-asc"

    if max_page == 0:
        async with aiohttp.ClientSession() as session:
            html = await fetch_page(session, base_url + '&page=1')
            soup = BeautifulSoup(html, 'html.parser')
            max_page = int(soup.find('div', id='search-page__content-bottom').find('ul').find_all('li')[5].find('a').text)

    tasks = [parse_page(base_url, page) for page in range(1, max_page + 1)]
    results = await asyncio.gather(*tasks)

    # Flatten the results and create a DataFrame
    flat_data = [item for sublist in results for item in sublist]
    df = pd.DataFrame(flat_data)

    # Render DataFrame
    df['URL'] = df['URL'].apply(lambda x: f'<a href="{x}" target="_blank">{x}</a>')

    return df

base = "jual/depok/rumah"
minPrice = 400_000_000
minLandArea = 150
minBuiltupSize = 300
maxLandArea = 2000
maxBuiltupSize = 2000
max_page = 1

df = await main(base, minPrice, minLandArea, minBuiltupSize, maxLandArea, maxBuiltupSize, max_page) 

pd.options.display.float_format = '{:.1f}'.format
display(HTML(df.to_html(escape=False)))


Unnamed: 0,Title,Price,Location,Bedrooms,Bathrooms,Land Area,Building Area,Agent Name,URL,Price per Bedroom,Cost per Bedroom,Area per Bedroom
0,Dijual 8 unit kontrakan Lokasi Sukamaju Cilodong Depok.,610.0,"Cilodong, Depok",8.0,8.0,371.0,371.0,Marlina Utami,https://rumah123.com//properti/depok/hos17144404/,1.3,76.2,46.4
1,Rumah Dijual Di Perumahan Lembah Cinere Depok,781.0,"Cinere, Depok",3.0,2.0,230.0,300.0,Mei Ling,https://rumah123.com//properti/depok/hos18495442/,4.3,260.3,100.0
2,DIJUAL RUMAH DI KOMPLEK SAWANGAN PERMAI KOTA DEPOK- VIA CESSIE,899.0,"Sawangan, Depok",5.0,2.0,320.0,470.0,Agung Maulana - THREE BROTHERS PROPERTY,https://rumah123.com//properti/depok/hos17411109/,3.0,179.8,94.0
3,Rumah Cimanggis Depok,900.0,"Mekarsari, Depok",4.0,2.0,165.0,300.0,Hendra Gunawan\t,https://rumah123.com//properti/depok/hos18033284/,3.8,225.0,75.0
4,"Dijual Rumah di Jalan Mayor Idrus, Tapos, Depok",906.0,"Tapos, Depok",5.0,2.0,212.0,312.0,Marketing Birmaland1,https://rumah123.com//properti/depok/hos17820164/,3.0,181.2,62.4
5,Rumah 2 Lantai Di Lembah Hijau Depok Mekarsari Bagus,950.0,"Mekarsari, Depok",5.0,3.0,165.0,300.0,Audi EraGading,https://rumah123.com//properti/depok/hos18058277/,3.2,190.0,60.0
6,Dijual Cepat Rumah Murah Dibawah Harga Njop di Perumahan Pondok Cibubur Cimanggis Kota Depok Siaphuni Minimalis Modern,1000.0,"Cimanggis, Depok",5.0,3.0,220.0,330.0,Nancy,https://rumah123.com//properti/depok/hos15499187/,3.3,200.0,66.0
7,"Rumah di Jl. Muhasim, Pancoran Mas, Kota Depok",1000.0,"Pancoran Mas, Depok",4.0,2.0,566.0,851.0,Marketing Birmaland1,https://rumah123.com//properti/depok/hos18017761/,4.2,250.0,212.8
8,Dijual Cepat Rumah Murah Bagus Di Perumahan Mekarsari Permai Cimanggis Kota Depok,1130.0,"Mekarsari, Depok",3.0,3.0,310.0,310.0,Gagah,https://rumah123.com//properti/depok/hos18668743/,6.3,376.7,103.3
9,RUMAH HUNIAN 2 LANTAI LOKASI STRATEGIS DI CIMANGGIS DEPOK,1200.0,"Cimanggis, Depok",5.0,3.0,165.0,300.0,DELILA WJUN,https://rumah123.com//properti/depok/hos17986893/,4.0,240.0,60.0


In [None]:
df.to_csv(f'pg_{base}.csv', index=False)

# Load Data

In [56]:
df = pd.read_csv('pg_depok.csv')
pd.options.display.float_format = '{:.2f}'.format
HTML(df.sort_values(by="Price per Bedroom").to_html(escape=False));

In [49]:
# Set up Chrome WebDriver
options = webdriver.ChromeOptions()
# options.add_argument('--headless')  # Run in headless mode
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

# Randomize user-agent
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
options.add_argument(f"user-agent={user_agent}")

service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

# Navigate to the target website
url = "https://www.rumah123.com/kost/jual/di-dki-jakarta/?minLandArea=199&minPrice=1000000000&sort=price-asc&page=1"
driver.get(url)

In [None]:
df["Description"] = ""
for index, row in df.iterrows():
    print(index)
    link = re.search(r'href="([^"]+)"', row["URL"])
    link = link.group(1)
    driver.get(link)
    df.loc[index, "Description"] = driver.find_element(By.XPATH, '//*[@id="property-information"]/div[2]/div[1]/div/p[2]').text

In [35]:
import pyperclip

pyperclip.copy(df.sort_values(by="Price per Bedroom").to_html(escape=False))

In [49]:
df = df.sort_values(by="Price per Bedroom").reset_index(drop=True)

def style_table(s):
    s = s.set_table_attributes('className="table-auto min-w-full border-collapse border border-gray-300"')
    s = s.set_table_styles(
        [
            {"selector": "thead tr", "props": [("className", "bg-gray-100")]},
            {"selector": "thead th", "props": [("className", "border border-gray-300 px-4 py-2 text-left font-medium text-gray-700")]},
            {"selector": "tbody tr:nth-child(odd)", "props": [("className", "odd:bg-white")]},
            {"selector": "tbody tr:nth-child(even)", "props": [("className", "even:bg-gray-50")]},
            {"selector": "tbody td", "props": [("className", "border border-gray-300 px-4 py-2 text-gray-600")]},
        ],
        overwrite=False,
    )
    return s
    
styled_table = (
    df.style.hide(axis="index")  # Removes index
    .pipe(style_table)  # Apply styling
)

html_table = styled_table.to_html()
HTML(html_table)
pyperclip.copy(html_table)

In [57]:
json_data = df.to_json(orient='records')  # Convert to list of dictionaries
pyperclip.copy(json_data)

In [12]:
print(df.to_json(orient='records'))  # Convert to list of dictionaries


[{"Title":"Dijual 8 unit kontrakan Lokasi Sukamaju Cilodong Depok.","Price":610.0,"Location":"Cilodong, Depok","Bedrooms":8.0,"Bathrooms":8.0,"Land Area":371.0,"Building Area":371.0,"Agent Name":"Marlina  Utami","URL":"<a href=\"https:\/\/rumah123.com\/\/properti\/depok\/hos17144404\/\" target=\"_blank\">https:\/\/rumah123.com\/\/properti\/depok\/hos17144404\/<\/a>","Price per Bedroom":1.2708333333,"Cost per Bedroom":76.25,"Area per Bedroom":46.375},{"Title":"Rumah Dijual Di Perumahan  Lembah Cinere Depok","Price":781.0,"Location":"Cinere, Depok","Bedrooms":3.0,"Bathrooms":2.0,"Land Area":230.0,"Building Area":300.0,"Agent Name":"Mei Ling ","URL":"<a href=\"https:\/\/rumah123.com\/\/properti\/depok\/hos18495442\/\" target=\"_blank\">https:\/\/rumah123.com\/\/properti\/depok\/hos18495442\/<\/a>","Price per Bedroom":4.3388888889,"Cost per Bedroom":260.3333333333,"Area per Bedroom":100.0},{"Title":"DIJUAL RUMAH DI KOMPLEK SAWANGAN PERMAI KOTA DEPOK- VIA CESSIE","Price":899.0,"Location":"S