In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import json
import time

headers = {"User-Agent": "Mozilla/5.0"}

names = []
cities = []
bedrooms = []
prices = []
rents = []
areas = []
furnishings = []

CITY = "Kolkata"

# FUNCTION: Extract area from RENT detail page

def extract_rent_area(detail_soup):
    items = detail_soup.find_all("li", class_="mb-ldp__dtls__body__list--item")
    for item in items:
        label = item.find("div", class_="mb-ldp__dtls__body__list--label")
        if not label:
            continue
        text = label.get_text(strip=True)

        if "Area" in text:
            block = item.find("div", class_="mb-ldp__dtls__body__list")
            if block:
                num = block.contents[0].strip()
                num = re.sub(r"\D", "", num)
                return num
    return "N/A"



# SCRAPE BUY PROPERTIES (RESIDENTIAL ONLY) — 15 pages

for page in range(1, 16):

    url = f"https://www.magicbricks.com/property-for-sale/residential-real-estate?cityName=kolkata&page={page}"
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, "html.parser")
    cards = soup.find_all("div", class_="mb-srp__list")

    for card in cards:

        # RESIDENTIAL FILTER FIRST
        furn = card.find("div", {"data-summary": "furnishing"})
        if furn:
            fv = furn.find("div", class_="mb-srp__card__summary--value")
            fval = fv.get_text(strip=True) if fv else "N/A"
        else:
            fval = "N/A"

        if fval == "N/A":
            continue  # skip non-residential

        # NAME
        title_tag = card.find("h2", class_="mb-srp__card--title")
        if not title_tag:
            continue

        title = title_tag.get_text(strip=True)
        names.append(title)
        cities.append(CITY)
        furnishings.append(fval)

        # BEDROOMS
        match = re.search(r"(\d+)\s*BHK", title, re.IGNORECASE)
        bedrooms.append(match.group(1) if match else "N/A")

        # PRICE
        price_tag = card.find("div", class_="mb-srp__card__price--amount")
        if price_tag:
            p = price_tag.get_text(strip=True).replace("₹", "").replace(",", "").strip()
        else:
            p = "N/A"
        prices.append(p)

        # RENT = N/A for BUY
        rents.append("N/A")

        # AREA
        super_area = "N/A"
        carpet_area = "N/A"

        sup = card.find("div", {"data-summary": "super-area"})
        if sup:
            v = sup.find("div", class_="mb-srp__card__summary--value")
            if v:
                super_area = re.sub(r"\D", "", v.get_text(strip=True))

        carp = card.find("div", {"data-summary": "carpet-area"})
        if carp:
            v = carp.find("div", class_="mb-srp__card__summary--value")
            if v:
                carpet_area = re.sub(r"\D", "", v.get_text(strip=True))

        areas.append(carpet_area if carpet_area != "N/A" else super_area)


# SCRAPE RENT PROPERTIES (RESIDENTIAL ONLY) — 5 pages

for page in range(1, 6):

    url = f"https://www.magicbricks.com/property-for-rent/residential-real-estate?cityName=kolkata&page={page}"
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, "html.parser")
    cards = soup.find_all("div", class_="mb-srp__list")

    for card in cards:

        # RESIDENTIAL FILTER FIRST
        furn = card.find("div", {"data-summary": "furnishing"})
        if not furn:
            continue
        fv = furn.find("div", class_="mb-srp__card__summary--value")
        if not fv:
            continue
        fval = fv.get_text(strip=True)

        # NAME
        title_tag = card.find("h2", class_="mb-srp__card--title")
        if not title_tag:
            continue

        title = title_tag.get_text(strip=True)
        names.append(title)
        cities.append(CITY)
        furnishings.append(fval)

        # BEDROOMS
        match = re.search(r"(\d+)\s*BHK", title)
        bedrooms.append(match.group(1) if match else "N/A")

        # PRICE = N/A for RENT
        prices.append("N/A")

        # RENT
        price_tag = card.find("div", class_="mb-srp__card__price--amount")
        if price_tag:
            r = price_tag.get_text(strip=True).replace("₹", "").replace(",", "").strip()
        else:
            r = "N/A"
        rents.append(r)

        # DETAIL LINK from JSON
        link = None
        scripts = card.find_all("script", {"type": "application/ld+json"})
        for s in scripts:
            try:
                d = json.loads(s.text)
                if "url" in d:
                    link = d["url"]
                    break
            except:
                pass

        # AREA
        area_val = "N/A"
        if link:
            try:
                html = requests.get(link, headers=headers, timeout=5).text
                detail_soup = BeautifulSoup(html, "html.parser")
                area_val = extract_rent_area(detail_soup)
            except:
                area_val = "N/A"

        areas.append(area_val)
        time.sleep(0.3)


# FINAL DATAFRAME

df = pd.DataFrame({
    "Name": names,
    "City": cities,
    "Bedrooms": bedrooms,
    "Price": prices,
    "Rent": rents,
    "Area": areas,
    "Furnishing": furnishings
})

df.reset_index(drop=True, inplace=True)
df


Unnamed: 0,Name,City,Bedrooms,Price,Rent,Area,Furnishing
0,"3 BHK Apartment for Sale in DTC GOOD EARTH, Ma...",Kolkata,3,52.5 Lac,,1020,Unfurnished
1,2 BHK Apartment for Sale in Team Taurus Singha...,Kolkata,2,66.3 Lac,,1197,Unfurnished
2,"3 BHK Apartment for Sale in Prudent Amara, Raj...",Kolkata,3,47.4 Lac,,898,Unfurnished
3,4 BHK Villa for Sale in Arizuma Southern Vista...,Kolkata,4,1.25 Cr,,1631,Unfurnished
4,3 BHK Apartment for Sale in Rishinox Ventoso P...,Kolkata,3,65.5 Lac,,785,Unfurnished
...,...,...,...,...,...,...,...
514,"2 BHK Flat for Rent in Realtech Titli, Realtec...",Kolkata,2,,18500,740,Semi-Furnished
515,"2 BHK Flat for Rent in Naktala, Garia, Kolkata",Kolkata,2,,23000,1000,Unfurnished
516,3 BHK Flat for Rent in Associated Erectors Gre...,Kolkata,3,,20000,800,Semi-Furnished
517,"2 BHK Flat for Rent in Bonorini, Bonorini, Dun...",Kolkata,2,,17000,750,Furnished


In [3]:
df.to_csv("properties.csv", index=False)

In [2]:
import pandas as pd
df=pd.read_csv('properties.csv')

In [3]:
df.sample(5)

Unnamed: 0,Name,City,Bedrooms,Price,Rent,Area,Furnishing
303,2 BHK Apartment for Sale in Kosmic North Grand...,Kolkata,2.0,45.5 Lac,,700,Unfurnished
210,3 BHK Apartment for Sale in Ghosh Para Kestopu...,Kolkata,3.0,55 Lac,,1100,Semi-Furnished
166,"2 BHK Apartment for Sale in Golpark, Gariahat ...",Kolkata,2.0,1.05 Cr,,960,Semi-Furnished
38,"2 BHK Apartment for Sale in JMC Broadway, Sect...",Kolkata,2.0,97.5 Lac,,776,Unfurnished
11,3 BHK Apartment for Sale in Srijan Town Square...,Kolkata,3.0,2.75 Cr,,2017,Unfurnished
