In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from abc import ABC, abstractmethod

# Base Property Class
class Property:
    def __init__(self, area, price, district, rooms, type_of_property):
        self.area = area
        self.price = price
        self.district = district
        self.rooms = rooms
        self.type_of_property = type_of_property
    
    def calculate_price_per_m2(self):
        return self.price / self.area

# Apartment Class
class Apartment(Property):
    def __init__(self, area, price, district, rooms, floor):
        super().__init__(area, price, district, rooms, 'Apartment')
        self.floor = floor

# House Class
class House(Property):
    def __init__(self, area, price, district, rooms, garden_size):
        super().__init__(area, price, district, rooms, 'House')
        self.garden_size = garden_size


# Strategy pattern for filters
class FilterStrategy(ABC):
    @abstractmethod
    def apply(self, data):
        pass

class PriceFilter(FilterStrategy):
    def __init__(self, price_limit):
        self.price_limit = price_limit

    def apply(self, data):
        return data[data['price'] <= self.price_limit]

class AreaFilter(FilterStrategy):
    def __init__(self, area_limit):
        self.area_limit = area_limit

    def apply(self, data):
        return data[data['area'] <= self.area_limit]

class DistrictFilter(FilterStrategy):
    def __init__(self, district):
        self.district = district

    def apply(self, data):
        return data[data['district'] == self.district]

class RoomCountFilter(FilterStrategy):
    def __init__(self, room_count):
        self.room_count = room_count

    def apply(self, data):
        return data[data['rooms'] == self.room_count]


# Importing and loading the data
def load_data(filename):
    return pd.read_csv(filename)

# Basic statistics
def basic_statistics(data):
    avg_price = data['price'].mean()
    avg_area = data['area'].mean()
    avg_rooms = data['rooms'].mean()
    max_price_district = data.groupby('district')['price'].mean().idxmax()
    sold_count = len(data)
    
    print(f"Average price: {avg_price}")
    print(f"Average area: {avg_area}")
    print(f"Average rooms: {avg_rooms}")
    print(f"Most expensive district: {max_price_district}")
    print(f"Number of properties sold: {sold_count}")
    
    return avg_price, avg_area, avg_rooms, max_price_district, sold_count


# Price vs Area Scatter Plot
def plot_price_vs_area(data):
    plt.figure(figsize=(10,6))
    sns.scatterplot(data=data, x="area", y="price", hue="district", palette="viridis")
    plt.title("Price vs Area")
    plt.xlabel("Area (m2)")
    plt.ylabel("Price")
    plt.show()

# Distribution of Prices
def plot_price_distribution(data):
    plt.figure(figsize=(10,6))
    sns.histplot(data['price'], kde=True, bins=30)
    plt.title("Price Distribution")
    plt.xlabel("Price")
    plt.ylabel("Frequency")
    plt.show()

# Correlation heatmap
def plot_correlation_matrix(data):
    plt.figure(figsize=(10,6))
    corr = data.corr()
    sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f")
    plt.title("Correlation Heatmap")
    plt.show()


# Export filtered data to Excel
def export_to_excel(data, filename="filtered_data.xlsx"):
    data.to_excel(filename, index=False)
    print(f"Data has been exported to {filename}")


# Price Prediction using Linear Regression
def price_predictor(data):
    # Prepare data for linear regression
    X = data[['area', 'rooms']]  # Features
    y = data['price']  # Target variable

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Linear Regression Model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Predictions
    y_pred = model.predict(X_test)

    # Model evaluation
    mse = mean_squared_error(y_test, y_pred)
    print(f"Mean Squared Error: {mse}")
    
    return model, mse


# CLI interfeysini yaratish
def main():
    # Load the dataset
    data = load_data("C:\\Users\\User\\OneDrive\\Рабочий стол\\Python_Projects\\Real_Estate_Data_Analayzer\\real_estate_data.csv")
    
    while True:
        print("""
1. Basic Statistics
2. Apply Filter and Export Data
3. Visualize Data
4. Price Prediction
5. Exit
""")
        choice = input("Select an option: ")
        
        if choice == '1':
            basic_statistics(data)
        
        elif choice == '2':
            filter_choice = input("Apply filter (price/area/district/rooms): ")
            if filter_choice == 'price':
                price_limit = float(input("Enter price limit: "))
                filter_strategy = PriceFilter(price_limit)
            elif filter_choice == 'area':
                area_limit = float(input("Enter area limit: "))
                filter_strategy = AreaFilter(area_limit)
            elif filter_choice == 'district':
                district = input("Enter district: ")
                filter_strategy = DistrictFilter(district)
            elif filter_choice == 'rooms':
                room_count = int(input("Enter number of rooms: "))
                filter_strategy = RoomCountFilter(room_count)

            filtered_data = filter_strategy.apply(data)
            export_to_excel(filtered_data)
        
        elif choice == '3':
            print("Visualizing data...")
            plot_price_vs_area(data)
            plot_price_distribution(data)
            plot_correlation_matrix(data)
        
        elif choice == '4':
            print("Running price prediction...")
            model, mse = price_predictor(data)
        
        elif choice == '5':
            break

if __name__ == "__main__":
    main()



1. Basic Statistics
2. Apply Filter and Export Data
3. Visualize Data
4. Price Prediction
5. Exit

Average price: 120000.0
Average area: 109.0
Average rooms: 3.4
Most expensive district: Yunusobod
Number of properties sold: 5

1. Basic Statistics
2. Apply Filter and Export Data
3. Visualize Data
4. Price Prediction
5. Exit

Data has been exported to filtered_data.xlsx

1. Basic Statistics
2. Apply Filter and Export Data
3. Visualize Data
4. Price Prediction
5. Exit



In [3]:
import time
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
from sklearn.linear_model import LinearRegression
import numpy as np
from webdriver_manager.chrome import ChromeDriverManager
import logging
import selenium

# Check Selenium version
logging.info(f"Selenium version: {selenium.__version__}")

# Configure logging
logging.basicConfig(
    filename='real_estate_scraper_log.txt',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

# Property class
class Property:
    def __init__(self, title, price_str, area_str, district, rooms):
        self.title = title
        self.price = self.clean_price(price_str)
        self.area = self.clean_area(area_str)
        self.district = district
        self.rooms = rooms

    def clean_price(self, price_str):
        try:
            cleaned = price_str.replace(" ", "").replace("so'm", "").replace("UZS", "").replace(",", "").strip()
            return int(float(''.join(filter(str.isdigit, cleaned))))
        except Exception as e:
            logging.error(f"Price cleaning failed for '{price_str}': {e}")
            return 0

    def clean_area(self, area_str):
        try:
            cleaned = area_str.replace("m²", "").replace(",", ".").strip()
            return float(''.join(filter(lambda x: x.isdigit() or x == '.', cleaned)))
        except Exception as e:
            logging.error(f"Area cleaning failed for '{area_str}': {e}")
            return 0.0

    def calculate_price_per_m2(self):
        return round(self.price / self.area, 2) if self.area else 0

    def to_dict(self):
        return {
            "Title": self.title,
            "Price": self.price,
            "Area": self.area,
            "District": self.district,
            "Rooms": self.rooms,
            "Price_per_m2": self.calculate_price_per_m2()
        }

# Scraper class
class RealEstateScraper:
    def __init__(self, url):
        self.url = url
        self.properties = []
        self.driver = None

    def suggest_selectors(self, soup):
        """Suggest potential CSS selectors for listings by analyzing common patterns."""
        potential_selectors = []
        common_classes = ['announcement', 'listing', 'item', 'card', 'search-item', 'block', 'post']
        common_tags = ['div', 'article', 'li']

        for tag in common_tags:
            for class_name in common_classes:
                selector = f"{tag}.{class_name}"
                elements = soup.select(selector)
                if len(elements) >= 5:  # Likely a listing if multiple elements found
                    potential_selectors.append((selector, len(elements)))
                # Try without class prefix
                selector = f"{tag}[class*='{class_name}']"
                elements = soup.select(selector)
                if len(elements) >= 5:
                    potential_selectors.append((selector, len(elements)))

        # Sort by number of elements found
        potential_selectors.sort(key=lambda x: x[1], reverse=True)
        return potential_selectors[:5]  # Return top 5 suggestions

    def test_selector(self, soup, selector, max_items=3):
        """Test a selector and return sample data for preview."""
        try:
            items = soup.select(selector)[:max_items]
            samples = []
            for item in items:
                title = item.select_one('.search-item-title, h3, .announcement-title, a, .title') or item
                price = item.select_one('.search-item-price, .price, .announcement-price, span') or item
                area = item.select_one('.search-item-area, .area, .announcement-area, span') or item
                district = item.select_one('.search-item-region, .region, .announcement-region, span') or item
                samples.append({
                    'title': title.get_text(strip=True) if title else 'N/A',
                    'price': price.get_text(strip=True) if price else 'N/A',
                    'area': area.get_text(strip=True) if area else 'N/A',
                    'district': district.get_text(strip=True) if district else 'N/A'
                })
            return samples
        except Exception as e:
            logging.error(f"Error testing selector '{selector}': {e}")
            return []

    def scrape(self, custom_selector=None):
        options = Options()
        options.add_argument('--headless')
        options.add_argument('--disable-gpu')
        options.add_argument('--no-sandbox')
        options.add_argument('--disable-dev-shm-usage')
        options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0')

        try:
            self.driver = webdriver.Chrome(
                service=Service(ChromeDriverManager().install()),
                options=options
            )
            logging.info(f"Navigating to {self.url}")
            self.driver.get(self.url)

            # Wait for listings to load
            try:
                WebDriverWait(self.driver, 10).until(
                    EC.presence_of_element_located((By.CSS_SELECTOR, "body"))
                )
                logging.info("Page body loaded successfully.")
            except Exception as e:
                logging.warning(f"Dynamic content wait failed: {e}")

            # Scroll to load more content
            self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)

            soup = BeautifulSoup(self.driver.page_source, 'lxml')

            # Suggest selectors if none provided
            if not custom_selector:
                print("\n🔍 Suggested CSS selectors for listings:")
                suggested_selectors = self.suggest_selectors(soup)
                if not suggested_selectors:
                    print("⚠️ No potential selectors found. Please inspect the website's HTML.")
                    logging.warning("No potential selectors found.")
                    return
                for i, (selector, count) in enumerate(suggested_selectors, 1):
                    print(f"{i}. {selector} (found {count} items)")
                    samples = self.test_selector(soup, selector)
                    if samples:
                        print("   Sample data:")
                        for sample in samples:
                            print(f"   - Title: {sample['title'][:50]}... | Price: {sample['price']} | Area: {sample['area']} | District: {sample['district']}")
                custom_selector = input("\nEnter a selector from above or a custom one (or press Enter to use the first): ").strip()
                if not custom_selector and suggested_selectors:
                    custom_selector = suggested_selectors[0][0]

            if not custom_selector:
                print("❌ No selector provided. Please specify a CSS selector.")
                logging.error("No selector provided.")
                return

            # Scrape using the chosen selector
            listings = soup.select(custom_selector)[:20]
            logging.info(f"Using selector '{custom_selector}': found {len(listings)} listings.")

            if not listings:
                print(f"❌ No listings found with selector '{custom_selector}'. Try a different selector.")
                logging.error(f"No listings found with selector '{custom_selector}'.")
                return

            for item in listings:
                try:
                    title_elem = item.select_one('.search-item-title, h3, .announcement-title, a, .title')
                    price_elem = item.select_one('.search-item-price, .price, .announcement-price, span')
                    area_elem = item.select_one('.search-item-area, .area, .announcement-area, span')
                    district_elem = item.select_one('.search-item-region, .region, .announcement-region, span')

                    title = title_elem.text.strip() if title_elem else 'N/A'
                    price = price_elem.text.strip() if price_elem else '0'
                    area = area_elem.text.strip() if area_elem else '0'
                    district = district_elem.text.strip() if district_elem else 'N/A'
                    rooms = title[0] if title and title[0].isdigit() else 'N/A'

                    prop = Property(title, price, area, district, rooms)
                    self.properties.append(prop)
                    logging.info(f"Scraped: {title} | Price: {price} | Area: {area} | District: {district}")
                except Exception as e:
                    logging.error(f"Error processing listing: {e}")
                    continue

        except Exception as e:
            logging.error(f"Scraping failed: {e}")
            print(f"Scraping failed: {e}")
        finally:
            if self.driver:
                self.driver.quit()

    def to_dataframe(self):
        df = pd.DataFrame([p.to_dict() for p in self.properties])
        logging.info(f"Created DataFrame with {len(df)} rows.")
        return df

# Strategy Pattern for filtering
class FilterStrategy:
    def apply(self, data):
        pass

class PriceFilter(FilterStrategy):
    def __init__(self, max_price):
        self.max_price = max_price

    def apply(self, data):
        return data[data["Price"] <= self.max_price]

class AreaFilter(FilterStrategy):
    def __init__(self, min_area):
        self.min_area = min_area

    def apply(self, data):
        return data[data["Area"] >= self.min_area]

class DistrictFilter(FilterStrategy):
    def __init__(self, district):
        self.district = district

    def apply(self, data):
        return data[data["District"].str.contains(self.district, case=False, na=False)]

class RoomCountFilter(FilterStrategy):
    def __init__(self, room_count):
        self.room_count = str(room_count)

    def apply(self, data):
        return data[data["Rooms"] == self.room_count]

# Export function
def export_to_excel(data, filename="filtered_data.xlsx"):
    try:
        data.to_excel(filename, index=False)
        print(f"✅ Ma’lumotlar {filename} fayliga eksport qilindi!")
        logging.info(f"Exported data to {filename}")
    except Exception as e:
        print(f"❌ Excel eksportida xato: {e}")
        logging.error(f"Excel export failed: {e}")

# Stats function
def show_statistics(data):
    if data.empty:
        print("📊 No data available for statistics.")
        return
    print("📊 Statistika:")
    print("O‘rtacha narx:", round(data["Price"].mean(), 2), "so'm")
    print("Eng qimmat hudud:", data.groupby("District")["Price"].mean().idxmax())
    print("Sotuvdagi jami obyektlar:", len(data))
    logging.info("Displayed statistics.")

# Visualization
def visualize_data(data):
    if data.empty:
        print("📊 No data available for visualization.")
        return
    plt.figure(figsize=(10, 6))
    sns.barplot(data=data, x="District", y="Price", estimator=np.mean)
    plt.xticks(rotation=45)
    plt.title("Hududlar bo‘yicha o‘rtacha narx")
    plt.tight_layout()
    plt.savefig('district_price_plot.png')
    logging.info("Saved visualization to district_price_plot.png")

# ML price prediction
def price_predictor(data, area):
    if data.empty or len(data) < 2:
        print("❌ Insufficient data for price prediction.")
        return 0
    try:
        model = LinearRegression()
        X = data[["Area"]]
        y = data["Price"]
        model.fit(X, y)
        pred = model.predict(np.array([[area]]))
        logging.info(f"Predicted price for area {area}m²: {pred[0]}")
        return round(pred[0], 2)
    except Exception as e:
        print(f"❌ Prediction failed: {e}")
        logging.error(f"Prediction failed: {e}")
        return 0

# CLI
def main():
    print("Real Estate Scraper for uybor.uz")
    print("-------------------------------")
    url = "https://uybor.uz/uz/toshkent/kvartiralar/"

    scraper = RealEstateScraper(url)
    scraper.scrape()

    df = scraper.to_dataframe()
    if df.empty:
        print("❌ No data scraped. Check 'real_estate_scraper_log.txt' for details.")
        print("Tip: Inspect the website's HTML (F12 in browser) to find the correct CSS selector.")
        return

    print("\n📊 Topilgan e'lonlar:")
    print(df.head())

    while True:
        print("""
        1. Statistika
        2. Filterlash va eksport
        3. Grafik vizualizatsiya
        4. Narxni bashorat qilish (ML)
        5. Chiqish
        """)
        choice = input("Tanlang (1-5): ").strip()

        if choice == '1':
            show_statistics(df)

        elif choice == '2':
            print("Filterlar: 1=Price, 2=Area, 3=District, 4=Rooms")
            f_choice = input("Filtr tanlang: ").strip()

            try:
                if f_choice == '1':
                    max_price = int(input("Maksimal narx (so'm): "))
                    strategy = PriceFilter(max_price)
                elif f_choice == '2':
                    min_area = float(input("Minimal maydon (m²): "))
                    strategy = AreaFilter(min_area)
                elif f_choice == '3':
                    district = input("Hudud nomi: ")
                    strategy = DistrictFilter(district)
                elif f_choice == '4':
                    room_count = input("Xonalar soni: ")
                    strategy = RoomCountFilter(room_count)
                else:
                    print("❌ Noto‘g‘ri filtr.")
                    continue

                filtered = strategy.apply(df)
                if filtered.empty:
                    print("❌ No data matches the filter.")
                else:
                    print(filtered)
                    export_to_excel(filtered)
            except Exception as e:
                print(f"❌ Filter error: {e}")
                logging.error(f"Filter error: {e}")

        elif choice == '3':
            visualize_data(df)

        elif choice == '4':
            try:
                area = float(input("Maydonni kiriting (m²): "))
                price = price_predictor(df, area)
                if price:
                    print(f"💰 Bashorat qilingan narx: {price} so‘m")
            except ValueError:
                print("❌ Invalid area input.")

        elif choice == '5':
            print("Dasturdan chiqildi.")
            break
        else:
            print("❌ Noto‘g‘ri tanlov!")

if __name__ == "__main__":
    main()

Real Estate Scraper for uybor.uz
-------------------------------





🔍 Suggested CSS selectors for listings:
⚠️ No potential selectors found. Please inspect the website's HTML.
❌ No data scraped. Check 'real_estate_scraper_log.txt' for details.
Tip: Inspect the website's HTML (F12 in browser) to find the correct CSS selector.
