In [1]:
import bs4
import requests
from urllib.parse import urlparse, parse_qs
import pandas as pd
import unicodedata

In [2]:
urls = []

for page_number in range(1, 142):

    URL = "https://www.villaciniz.com.tr/arama-sonuclari?s=" + str(page_number)
    response = requests.get(URL)
    response.encoding = "utf-8"  
    html_parsed = bs4.BeautifulSoup(response.text, "html.parser")

    villa_urls = set([url["href"].strip() for url in html_parsed.select("div#searchResult div.carousel-item a")])
    for url in villa_urls:
        urls.append("https://villaciniz.com.tr" + url)
        
    if page_number % 10 == 0:
        print(f"{page_number} pages have been checked!")
        
print(f"\n---OPERATION HAS BEEN COMPLETED!---")
print(f"{page_number} pages have been checked!")
print(f"Total Villa URL: {len(urls)}")

10 pages have been checked!
20 pages have been checked!
30 pages have been checked!
40 pages have been checked!
50 pages have been checked!
60 pages have been checked!
70 pages have been checked!
80 pages have been checked!
90 pages have been checked!
100 pages have been checked!
110 pages have been checked!
120 pages have been checked!
130 pages have been checked!
140 pages have been checked!

---OPERATION HAS BEEN COMPLETED!---
141 pages have been checked!
Total Villa URL: 1684


In [3]:
names = []
addresses = []
photos = []
latitudes = []
longitudes = []
average_prices = []
q1_prices = []
q2_prices = []
q3_prices = []
q4_prices = []
descriptions = []
guests = []
bedrooms = []
bathrooms = []
ids = []

URL_ERRORS = []
villa_number = 0
for URL in urls:
    try:
        response = requests.get(URL)
        response.encoding = "utf-8" 
        html_parsed = bs4.BeautifulSoup(response.text, "html.parser")
        villa_number += 1
    except:
        print(f"{URL} : URL ERROR!")
        URL_ERRORS.append(URL)
    
    try:
        name = html_parsed.select_one("div.product-title.fs-4.fw-normal.mb-2.pb-1").getText().strip()
        villa_id = name.lower().replace(" ", "")
    except:
        name = None
        villa_id = None
        print(f"Name Error!: {URL}")
        
    try:
        address = html_parsed.select_one("span.fs-5.fw-light.d-block.mb-4").getText().strip()
    except:
        address = None
        print(f"Address Error!: {URL}")
        
    try:
        image = tuple(image_url["href"] for image_url in html_parsed.select("div.container div.row.mb-5.pb-sm-5.mt-3 div.content-start.col-xl-8 div.product-primary-image.rounded-3.position-relative div.d-none a"))
    except:
        image = None
        print(f"Image Error!: {URL}")
        
    try:
        location = html_parsed.select_one("div.col-sm-4.pb-sm-4  div.distances-action div a")["href"].split("@")[1].split(",")[:2]
        latitude, longitude = map(float, location)
    except:
        latitude = None
        longitude = None
        print(f"Location Error!: {URL}")
        
    try:
        villa_prices = [price.getText().strip().replace(".", "") for price in html_parsed.select("div.h-100.d-flex.flex-column.align-items-center.justify-content-center.text-center.py-2")]
        villa_prices = [int(price.replace("₺Gecelik", "")) for price in villa_prices if "Gece" in price]
        avg_price = sum(villa_prices) // len(villa_prices)
        q1_price = avg_price*4 * 0.14
        q2_price = avg_price*4 * 0.32
        q3_price = avg_price*4 * 0.37
        q4_price = avg_price*4 * 0.17
    except:
        print(f"{URL} : PRICE ERROR!")
        avg_price = None
        q1_price = None
        q2_price = None
        q3_price = None
        q4_price = None
        
    try:
        description = unicodedata.normalize("NFKD", "\n".join([desc.getText().strip() for desc in html_parsed.select("#villa-description p")[:-1]]))
    except:
        print(f"{URL} : DESCRIPTION ERROR!")
        description = None
        
    try:
        guest = int(html_parsed.select("span.fs-5.fw-light.d-inline-block")[0].getText()[0])
    except:
        print(f"{URL} : GUEST NUMBER ERROR!")
        guest = None
        
    try:
        bedroom = int(html_parsed.select("span.fs-5.fw-light.d-inline-block")[1].getText()[0])
    except:
        print(f"{URL} : BEDROOM NUMBER ERROR!")
        bedroom = None
        
    try:
        bathroom = int(html_parsed.select("span.fs-5.fw-light.d-inline-block")[2].getText()[0])        
    except:
        print(f"{URL} : BATHROOM NUMBER ERROR!")
        bathroom = None
           
    addresses.append(address)
    ids.append(villa_id)
    names.append(name)
    photos.append(image)
    latitudes.append(latitude)
    longitudes.append(longitude)
    descriptions.append(description)
    average_prices.append(avg_price)
    q1_prices.append(q1_price)
    q2_prices.append(q2_price)
    q3_prices.append(q3_price)
    q4_prices.append(q4_price)
    guests.append(guest)
    bedrooms.append(bedroom)
    bathrooms.append(bathroom)
    
    if villa_number % 75 == 0:
        print(f"{villa_number} villas have been completed!")

print(f"THE OPERATION IS COMPLETED!")
print(f"TOTAL VILLA NUMBER: {villa_number}")

Location Error!: https://villaciniz.com.tr/kiralik-villalar/villa-tashan-lidya
Location Error!: https://villaciniz.com.tr/kiralik-villalar/villa-tashan-talya
Location Error!: https://villaciniz.com.tr/kiralik-villalar/villa-tashan-metehan
Location Error!: https://villaciniz.com.tr/kiralik-villalar/villa-tashan-lina
Location Error!: https://villaciniz.com.tr/kiralik-villalar/villa-paradise-uno
Location Error!: https://villaciniz.com.tr/kiralik-villalar/villa-eris
Location Error!: https://villaciniz.com.tr/kiralik-villalar/villa-firoza-4
Location Error!: https://villaciniz.com.tr/kiralik-villalar/villa-voluptas
Location Error!: https://villaciniz.com.tr/kiralik-villalar/villa-diamon-3
Location Error!: https://villaciniz.com.tr/kiralik-villalar/villa-firoza-3
75 villas have been completed!
Location Error!: https://villaciniz.com.tr/kiralik-villalar/villa-diamon-1
Location Error!: https://villaciniz.com.tr/kiralik-villalar/villa-paradise-duo
Location Error!: https://villaciniz.com.tr/kiral

900 villas have been completed!
Location Error!: https://villaciniz.com.tr/kiralik-villalar/villa-water-edge-4
Location Error!: https://villaciniz.com.tr/kiralik-villalar/villa-ortac-4
Location Error!: https://villaciniz.com.tr/kiralik-villalar/villa-water-edge-5
Location Error!: https://villaciniz.com.tr/kiralik-villalar/villa-kibele-merkur
Location Error!: https://villaciniz.com.tr/kiralik-villalar/villa-water-edge-6
Location Error!: https://villaciniz.com.tr/kiralik-villalar/villa-gloksinya
Location Error!: https://villaciniz.com.tr/kiralik-villalar/villa-james-1
Location Error!: https://villaciniz.com.tr/kiralik-villalar/villa-james-2
Location Error!: https://villaciniz.com.tr/kiralik-villalar/villa-james-6
Location Error!: https://villaciniz.com.tr/kiralik-villalar/villa-james-3
Location Error!: https://villaciniz.com.tr/kiralik-villalar/villa-vivienda
Location Error!: https://villaciniz.com.tr/kiralik-villalar/villa-sura-duo
Location Error!: https://villaciniz.com.tr/kiralik-vill

In [4]:
data_raw = {
    "ID": ids,
    "Name": names,
    "Address": addresses,
    "Guest Number": guests,
    "Bedroom Number": bedrooms,
    "Bathroom Number": bathrooms,
    "Latitude": latitudes,
    "Longitude": longitudes,
    "Description": descriptions,
    "Photo URLs": photos,
    "Average Price": average_prices,
    "Q1 Price": q1_prices,
    "Q2 Price": q2_prices,
    "Q3 Price": q3_prices,
    "Q4 Price": q4_prices
}

df_raw = pd.DataFrame(data_raw)
df_raw.set_index("ID", inplace=True)
df_raw.to_excel("villaciniz_raw.xlsx")

data_info = {
    "ID": ids,
    "Name": names,
    "Address": addresses,
    "Guest Number": guests,
    "Bedroom Number": bedrooms,
    "Bathroom Number": bathrooms,
    "Description": descriptions,
    "Photo URLs": photos,
    "Latitude": latitudes,
    "Longitude": longitudes
}

df_info = pd.DataFrame(data_info)
df_info.set_index("ID", inplace=True)
df_info.to_excel("villaciniz_info.xlsx")

data_financial = {
    "ID": ids,
    "Average Price": average_prices,
    "Q1 Price": q1_prices,
    "Q2 Price": q2_prices,
    "Q3 Price": q3_prices,
    "Q4 Price": q4_prices   
}

df_financial = pd.DataFrame(data_financial)
df_financial.set_index("ID", inplace=True)
df_financial.to_excel("villaciniz_financial.xlsx")