In [None]:
import os
import json
import sys
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options

# --- Setup ChromeDriver path safely ---
chromedriver_path = r"C:\Users\P RAJ KIRAN\.wdm\drivers\chromedriver\win64\138.0.7204.183\chromedriver-win32\chromedriver.exe"
print(f"Chromedriver path: {chromedriver_path}")

# --- Configure Selenium Service ---
options = Options()
# options.add_argument('--headless')  
service = Service(executable_path=chromedriver_path)
driver = webdriver.Chrome(service=service, options=options)


def get_data(soup):
    h = []
    prop = soup.find_all('div', class_='m-srp-card SRCard')
    for i in prop:
        try:
            meta = i.find_all('meta')
            data = {m['itemprop']: m.get('content', '') for m in meta}
            s = i.find('span', class_='hidden')

            ag = soup.find('span', id=s['id'])

            var = {
                "name": data.get('name', ''),
                "id": s['id'],
                "description": data.get('description', ''),
                "url": 'https://www.magicbricks.com' + data.get('url', ''),
                "price": s['data-price'],
                "priceInWord": ag.get('data-priced', ''),
                "location": {
                    "cityName": ag.get('data-cityname', ''),
                    "addressLocality": data.get('addressLocality', ''),
                    "longitude": data.get('longitude', ''),
                    "latitude": data.get('latitude', ''),
                },
                "flatDetails": {
                    "numberOfRooms": data.get('numberOfRooms', ''),
                    "bathroom": s['data-bathroom'],
                    "bedroom": s['data-bedroom'],
                    "floorSize": data.get('floorSize', ''),
                    "floorno": s['data-floorno'],
                    "furnshingstatus": s['data-furnshingstatus'],
                },
                "agentDetails": {
                    "agentName": ag.get('data-soname', ''),
                    "agentCompanyName": ag.get('data-companyname', ''),
                    "agentMaskedmobilenumber": ag.get('data-maskedmobilenumber', ''),
                }
            }

            h.append(var)
        except Exception as e:
            print("Error parsing a property:", e)

    with open("data.json", 'r+', encoding='utf-8') as f:
        feeds = json.load(f)
        feeds['property'].extend(h)
        f.seek(0)
        json.dump(feeds, f, indent=2)


def get_house_links(driver):
    cities = [
        'Gurgaon', 'Noida', 'Ghaziabad', 'Greater-Noida', 'Bangalore', 'Mumbai',
        'Pune', 'Hyderabad', 'Kolkata', 'Chennai', 'New-Delhi', 'Ahmedabad',
        'Navi-Mumbai', 'Thane', 'Faridabad', 'Bhubaneswar', 'Bokaro-Steel-City',
        'Vijayawada', 'Vrindavan', 'Bhopal', 'Gorakhpur', 'Jamshedpur', 'Agra',
        'Allahabad', 'Jodhpur', 'Aurangabad', 'Jaipur', 'Mangalore', 'Nagpur',
        'Guntur', 'Navsari', 'Palghar', 'Salem', 'Haridwar', 'Durgapur', 'Madurai',
        'Manipal', 'Patna', 'Ranchi', 'Raipur', 'Sonipat', 'Kottayam', 'Kozhikode',
        'Thrissur', 'Tirupati', 'Trivandrum', 'Trichy', 'Udaipur', 'Vapi',
        'Varanasi', 'Vadodara', 'Visakhapatnam', 'Surat', 'Kanpur', 'Kochi',
        'Mysore', 'Goa', 'Bhiwadi', 'Lucknow', 'Nashik', 'Guwahati', 'Chandigarh',
        'Indore', 'Coimbatore', 'Dehradun'
    ]

    for city in cities:
        for i in range(1, 90):
            try:
                url = f"https://www.magicbricks.com/property-for-sale/residential-real-estate?proptype=Multistorey-Apartment,Builder-Floor-Apartment,Penthouse,Studio-Apartment,Residential-House,Villa&page={i}&cityName={city}"
                print(f"Scraping: {url}")
                driver.get(url)
                soup = BeautifulSoup(driver.page_source, 'html.parser')
                get_data(soup)
            except Exception as e:
                print(f"Error on city {city} page {i}:", e)


# Create data file initially
if not os.path.exists("data.json"):
    with open("data.json", mode='w', encoding='utf-8') as f:
        json.dump({"property": []}, f)

# Run the scraper
try:
    get_house_links(driver)
finally:
    driver.quit()
    print("Driver session closed.")
