In [None]:
import requests
import time
import random
from bs4 import BeautifulSoup

# global variables
base_url = "https://www.zameen.com"


def convert_price(price):
    """
    Convert crore, lakhs, millions, and Thousand into numbers

    :param price: str
    :return: float
    """
    if price.endswith("Crore"):
        return round(float(price[:-5]) * 10000000)
    elif price.endswith("Lakh"):
        return round(float(price[:-4]) * 100000)
    elif price.endswith("Million"):
        return round(float(price[:-7]) * 1000000)
    elif price.endswith("Arab"):
        return round(float(price[:-4]) * 1000000000)
    elif price.endswith("Thousand"):
        return round(float(price[:-8]) * 1000)
    else:
        return round(float(price))


def convert_size(size):
    """
    Convert kanal marla into sqft

    :param size: str
    :return: float
    """
    if size.endswith("Marla"):
        return round(float(size[:-5].replace(",", "")) * 225)
    elif size.endswith("Kanal"):
        return round(float(size[:-5].replace(",", "")) * 4500)
    elif size.endswith("Sq. Yd."):
        return round(float(size[:-7].replace(",", "")) * 9)
    else:
        return round(float(size))


def text(tag, datatype="str"):
    """
    This function will return the text of the tag.

    :param tag: tag object
    :param datatype: num or str or price, size
    :return: price in number or string
    """
    if tag is None and datatype == "num":
        return 0
    if datatype == "num":
        try:
            return int(tag.text.strip())
        except ValueError:
            return 0
    if tag is None and datatype == "str":
        return ""
    if datatype == "str":
        return tag.text.strip()
    if tag is None and datatype == "price":
        return 0.0
    if datatype == "price":
        return convert_price(tag.text.strip())
    if tag is None and datatype == "size":
        return 0.0
    if datatype == "size":
        return convert_size(tag.text.strip())


def property_Detail(detail_link):
    complete_link = f"{base_url}{detail_link}"
    print("completeLink", complete_link)
    response = requests.get(complete_link)
    soup = BeautifulSoup(response.text, "html.parser")

    description = soup.find("span", {"class": "_3547dac9"})

    if description is not None:
         description = description.text
    else:
         description = None
    
    return description


def scrap(city, start_page, end_page):
    """
    This function will scrap the zameen.com website and
    return the list of houses information

    :param city: str
    :param start_page: int
    :param end_page: int
    :return: list
    """
    house_info = []
    page_number = start_page
    # print("start_page", start_page)

    while page_number <= end_page:
        url = f"{base_url}/Homes/{city}-{page_number}.html"
        print("url", url)
        response = requests.get(url)
        soup = BeautifulSoup(response.text, "html.parser")

        house_list = soup.select("main > div > div > div > div > ul > li")
        # print("house_list", house_list)

        for house in house_list:
            baths = house.select_one("span[aria-label='Baths']")
            beds = house.select_one("span[aria-label='Beds']")
            location = house.select_one("div[aria-label='Location']")
            price = house.select_one("span[aria-label='Price']")
            size = house.select_one("div[title]>div > div > span:nth-child(1)")
            image = house.find("img").get("src") if house.find("img") else "N/A"
            detail_link = house.find("a").get("href") if house.find("a") else None

            if detail_link:
                description = property_Detail(detail_link)
                print("description")

            if price:
                if size is None:
                    size = location.parent.select_one(
                        "div:nth-child(2) > div > span:nth-child(3)"
                    )
                house_info.append(
                    {
                        "location": text(location),
                        "price": text(price, datatype="price"),
                        "bedrooms": text(beds, datatype="num"),
                        "baths": text(baths, datatype="num"),
                        "size": text(size, datatype="size"),
                        "image": image,
                        "description": description,
                    }
                )

        time.sleep(random.uniform(2, 5))
        page_number += 1

    return house_info


if __name__ == "__main__":
    # Define the city as Karachi
    city = {"id": 2, "name": "Karachi"}

    # Define the starting and ending page numbers
    start_page = 1
    end_page = 730

    # Call the scraping function
    house_info = scrap(f"{city.get('name')}-{city.get('id')}", start_page, end_page)

    # Save the data to a CSV file
    with open("testkarachi_homes1-730.csv", "w") as f:
        f.write("location|price|bedrooms|baths|size|image|description\n")
        for info in house_info:
            f.write(
                f"{info.get('location')}|{info.get('price')}|{info.get('bedrooms')}|{info.get('baths')}|{info.get('size')}|{info.get('image')}|{info.get('description')}\n"
            )
