<a href="https://colab.research.google.com/github/AliReza000J/realestate-ranking/blob/main/realEstate_ranking_preprocess.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! pip install datasets
! pip install hazm
! pip install persiantools

import pandas as pd
import numpy as np
from datasets import load_dataset
import re
from hazm import Normalizer, WordTokenizer, Lemmatizer, stopwords_list
from sklearn.preprocessing import MinMaxScaler
from nltk.tokenize import sent_tokenize, word_tokenize
import nltk
nltk.download('punkt_tab')
from persiantools.jdatetime import JalaliDate



[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Load data
ds = load_dataset("divaroffical/real_estate_ads")
df = ds['train'].to_pandas()
print("Shape:", df.shape)
pd.set_option("display.max_columns", None)
df.head()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/6.21k [00:00<?, ?B/s]

real_estate_ads.csv:   0%|          | 0.00/781M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1000000 [00:00<?, ? examples/s]

Shape: (1000000, 60)


Unnamed: 0,cat2_slug,cat3_slug,city_slug,neighborhood_slug,created_at_month,user_type,description,title,rent_mode,rent_value,rent_to_single,rent_type,price_mode,price_value,credit_mode,credit_value,rent_credit_transform,transformable_price,transformable_credit,transformed_credit,transformable_rent,transformed_rent,land_size,building_size,deed_type,has_business_deed,floor,rooms_count,total_floors_count,unit_per_floor,has_balcony,has_elevator,has_warehouse,has_parking,construction_year,is_rebuilt,has_water,has_warm_water_provider,has_electricity,has_gas,has_heating_system,has_cooling_system,has_restroom,has_security_guard,has_barbecue,building_direction,has_pool,has_jacuzzi,has_sauna,floor_material,property_type,regular_person_capacity,extra_person_capacity,cost_per_extra_person,rent_price_on_regular_days,rent_price_on_special_days,rent_price_at_weekends,location_latitude,location_longitude,location_radius
0,temporary-rent,villa,karaj,mehrshahr,2024-08-01 00:00:00,مشاور املاک,۵۰۰متر\n۲۰۰متر بنا دوبلکس\n۳خواب\nاستخر آبگرم ...,باغ ویلا اجاره روزانه استخر داخل لشکرآباد سهیلیه,,,,,,,,,,,,,,,,500.0,,,,سه,,,,,,,,,,,,,,,,,,,,,,,,4.0,6.0,350000.0,1500000.0,3500000000.0,3500000.0,35.811684,50.9366,500.0
1,residential-sell,apartment-sell,tehran,gholhak,2024-05-01 00:00:00,مشاور املاک,دسترسی عالی به مترو و شریعتی \nمشاعات تمیز \nب...,۶۰ متر قلهک فول امکانات,,,,,مقطوع,8500000000.0,,,,,,,,,,60.0,,,3.0,یک,,,,True,True,True,۱۳۸۴,,,,,,,,,,,,,,,,,,,,,,,,,500.0
2,residential-rent,apartment-rent,tehran,tohid,2024-10-01 00:00:00,,تخلیه پایان ماه,آپارتمان ۳ خوابه ۱۳۲ متر,مقطوع,26000000.0,,,,,مقطوع,750000000.0,False,False,750000000.0,,26000000.0,,,132.0,,,3.0,سه,,,,True,True,True,۱۴۰۱,False,,,,,,,,,,,,,,,,,,,,,,35.703865,51.373459,
3,commercial-rent,office-rent,tehran,elahiyeh,2024-06-01 00:00:00,,فرشته تاپ لوکیشن\n۹۰ متر موقعیت اداری\nیک اتاق...,فرشته ۹۰ متر دفتر کار مدرن موقعیت اداری,مقطوع,95000000.0,,,,,مقطوع,950000000.0,False,False,950000000.0,,95000000.0,,,90.0,,,4.0,یک,,,,True,False,True,۱۴۰۰,,,,,,,,,,,,,,,,,,,,,,,,,
4,residential-sell,apartment-sell,mashhad,emamreza,2024-05-01 00:00:00,مشاور املاک,هلدینگ ساختمانی اکبری\n\nهمراه شما هستیم برای ...,۱۱۵ متری/شمالی رو به آفتاب/اکبری,,,,,مقطوع,5750000000.0,,,,,,,,,,115.0,single_page,,4.0,دو,6.0,,True,True,True,True,۱۴۰۳,,,package,,,shoofaj,air_conditioner,squat_seat,,,north,,,,ceramic,,,,,,,,,,


In [None]:
# Basic text cleaning
normalizer = Normalizer()
tokenizer = WordTokenizer()
lemmatizer = Lemmatizer()
stopwords = set(stopwords_list())

In [None]:
# Persian to English digit map
persian_digit_map = str.maketrans("۰۱۲۳۴۵۶۷۸۹", "0123456789")

# Function to convert a column with Persian digits to numeric
def convert_persian_digits(column):
    return pd.to_numeric(
        column.astype(str).str.translate(persian_digit_map),
        errors='coerce'  # Convert invalid ones (like 'nan' or 'None') to NaN
    )

# Apply to 'construction_year' and 'rooms_count'
df['construction_year'] = convert_persian_digits(df['construction_year'])
df['rooms_count'] = convert_persian_digits(df['rooms_count'])

# Drop unrealistic or missing years (e.g., less than 1300 or greater than 1500 in the Persian calendar)
df = df[df['construction_year'].between(1300, 1500, inclusive='both')]

# Remove rows with missing construction_year (both None and NaN)
df = df[~df['construction_year'].isnull()]

In [None]:
# Normalize all price-related columns
price_columns = [
    'price_value', 'credit_value', 'rent_value',
    'transformable_price', 'transformable_credit', 'transformable_rent',
    'transformed_rent', 'transformed_credit',
    'rent_price_on_regular_days', 'rent_price_on_special_days',
    'rent_price_at_weekends', 'cost_per_extra_person'
]

def normalize_price(value):
    try:
        if pd.isna(value): return np.nan
        return float(str(value).replace(",", "").strip())
    except:
        return np.nan

for col in price_columns:
    df[col] = df[col].apply(normalize_price)

# Calculate unified price
def calculate_unified_price(row):
    # Use price_value if available
    if pd.notna(row['price_value']):
        return row['price_value']

    # Use credit_value * coefficient based on construction year
    credit = row['credit_value']
    year = row['construction_year']
    tr_credit = row['transformed_credit']

    if pd.notna(credit) and pd.notna(year):
        if year >= 1400:
            return credit * 4
        elif 1390 <= year < 1400:
            return credit * 5
        elif year < 1390:
            return credit * 6
    elif pd.notna(tr_credit):
        if year >= 1400:
            return tr_credit * 4
        elif 1390 <= year < 1400:
            return tr_credit * 5
        elif year < 1390:
            return tr_credit * 6

    # Short-term pricing (daily rent)
    if not pd.isna(row['rent_price_on_regular_days']):
        return row['rent_price_on_regular_days'] * 1000

    return np.nan

# Apply to DataFrame
df['unified_price'] = df.apply(calculate_unified_price, axis=1)
df['log_unified_price'] = np.log1p(df['unified_price'])

In [None]:
df = df[df['unified_price'].notna() & (df['unified_price'] >= 10_000_000)]
df.dropna(subset=['description', 'building_size'], inplace=True)

df['created_at_month'] = pd.to_datetime(df['created_at_month'])

# Convert the 'created_at_month' to Persian Date and extract month and day
df['month'] = df['created_at_month'].apply(lambda x: JalaliDate(x).month)
df['day'] = df['created_at_month'].apply(lambda x: JalaliDate(x).day)

# Show the updated dataframe
df[['created_at_month', 'month', 'day']].head()

Unnamed: 0,created_at_month,month,day
1,2024-05-01,2,12
2,2024-10-01,7,10
3,2024-06-01,3,12
4,2024-05-01,2,12
5,2024-09-01,6,11


In [None]:
df["has_multiple_pricing_options"] = (
    (df["transformed_rent"].notnull() & df["transformable_rent"].notnull()) |
    (df["transformed_credit"].notnull() & df["transformable_credit"].notnull())
)

In [None]:
# Step 1: Make sure both columns are strings
df['property_type'] = df['property_type'].astype(str)
df['cat3_slug'] = df['cat3_slug'].astype(str)

# Step 2: Concatenate original property_type to cat3_slug if it's not null or 'nan'
df['cat3_slug'] = df.apply(
    lambda row: row['cat3_slug'] + '-' + row['property_type']
    if row['property_type'].lower() != 'none' else row['cat3_slug'],
    axis=1
)

# Step 3: Drop the old 'property_type' column
df = df.drop(columns=['property_type'])

df = df.rename(columns={'cat2_slug': 'listing_type', 'cat3_slug': 'property_type'})

target_col = 'property_type'
df = df[df[target_col].notna()]
df[target_col] = df[target_col].str.replace(r'\b(-sell|-rent)\b', '' , case = False, regex = True)
df[target_col] = df[target_col].str.strip('-').str.replace(r'-{2,}', '-', regex = True).str.strip()
df.head()

Unnamed: 0,listing_type,property_type,city_slug,neighborhood_slug,created_at_month,user_type,description,title,rent_mode,rent_value,rent_to_single,rent_type,price_mode,price_value,credit_mode,credit_value,rent_credit_transform,transformable_price,transformable_credit,transformed_credit,transformable_rent,transformed_rent,land_size,building_size,deed_type,has_business_deed,floor,rooms_count,total_floors_count,unit_per_floor,has_balcony,has_elevator,has_warehouse,has_parking,construction_year,is_rebuilt,has_water,has_warm_water_provider,has_electricity,has_gas,has_heating_system,has_cooling_system,has_restroom,has_security_guard,has_barbecue,building_direction,has_pool,has_jacuzzi,has_sauna,floor_material,regular_person_capacity,extra_person_capacity,cost_per_extra_person,rent_price_on_regular_days,rent_price_on_special_days,rent_price_at_weekends,location_latitude,location_longitude,location_radius,unified_price,log_unified_price,month,day,has_multiple_pricing_options
1,residential-sell,apartment,tehran,gholhak,2024-05-01,مشاور املاک,دسترسی عالی به مترو و شریعتی \nمشاعات تمیز \nب...,۶۰ متر قلهک فول امکانات,,,,,مقطوع,8500000000.0,,,,,,,,,,60.0,,,3,,,,,True,True,True,1384.0,,,,,,,,,,,,,,,,,,,,,,,,500.0,8500000000.0,22.863332,2,12,False
2,residential-rent,apartment,tehran,tohid,2024-10-01,,تخلیه پایان ماه,آپارتمان ۳ خوابه ۱۳۲ متر,مقطوع,26000000.0,,,,,مقطوع,750000000.0,False,,750000000.0,,26000000.0,,,132.0,,,3,,,,,True,True,True,1401.0,False,,,,,,,,,,,,,,,,,,,,,35.703865,51.373459,,3000000000.0,21.821878,7,10,False
3,commercial-rent,office,tehran,elahiyeh,2024-06-01,,فرشته تاپ لوکیشن\n۹۰ متر موقعیت اداری\nیک اتاق...,فرشته ۹۰ متر دفتر کار مدرن موقعیت اداری,مقطوع,95000000.0,,,,,مقطوع,950000000.0,False,,950000000.0,,95000000.0,,,90.0,,,4,,,,,True,False,True,1400.0,,,,,,,,,,,,,,,,,,,,,,,,,3800000000.0,22.058267,3,12,False
4,residential-sell,apartment,mashhad,emamreza,2024-05-01,مشاور املاک,هلدینگ ساختمانی اکبری\n\nهمراه شما هستیم برای ...,۱۱۵ متری/شمالی رو به آفتاب/اکبری,,,,,مقطوع,5750000000.0,,,,,,,,,,115.0,single_page,,4,,6.0,,True,True,True,True,1403.0,,,package,,,shoofaj,air_conditioner,squat_seat,,,north,,,,ceramic,,,,,,,,,,5750000000.0,22.472466,2,12,False
5,residential-rent,apartment,ahvaz,mellirah,2024-09-01,,اپارتمان ۱۰۰متری واحد جلو دارای دو اتاق خواب ک...,اپارتمان ۱۰۰متری ملی راه واحد جلو,مقطوع,6000000.0,,,,,مقطوع,250000000.0,False,,250000000.0,400000000.0,6000000.0,1.0,,100.0,,,3,,3.0,2.0,True,False,True,True,1389.0,False,,water_heater,,,,,squat,,,,,,,carpet,,,,,,,,,,1500000000.0,21.128731,6,11,True


In [None]:
# Detect phone numbers
df['has_phone_number'] = df['description'].str.contains(r'\b(\d{3}[-.\s]?\d{3}[-.\s]?\d{4}|\d{8,})\b', regex=True)

# Regex pattern for detecting all emojis
emoji_pattern = r'['
emoji_pattern += '\U0001F600-\U0001F64F'  # Emoticons
emoji_pattern += '\U0001F300-\U0001F5FF'  # Symbols & Pictographs
emoji_pattern += '\U0001F680-\U0001F6FF'  # Transport & Map Symbols
emoji_pattern += '\U0001F700-\U0001F77F'  # Alchemical Symbols
emoji_pattern += '\U0001F780-\U0001F7FF'  # Geometric Shapes Extended
emoji_pattern += '\U0001F800-\U0001F8FF'  # Supplemental Arrows-C
emoji_pattern += '\U0001F900-\U0001F9FF'  # Supplemental Symbols and Pictographs
emoji_pattern += '\U0001FA00-\U0001FA6F'  # Chess Symbols
emoji_pattern += '\U0001FA70-\U0001FAFF'  # Symbols for Legacy Computing
emoji_pattern += '\U00002702-\U000027B0'  # Dingbats
emoji_pattern += '\U0001F004-\U0001F0CF'  # Playing Cards
emoji_pattern += '\U00002B50'  # Star Emoji
emoji_pattern += '\U0001F004-\U0001F0CF'  # Playing Cards
emoji_pattern += '\U0000203C'  # Double Exclamation Mark
emoji_pattern += '\U0000FE0F'  # Variation selector (used to indicate emoji-style presentation)
emoji_pattern += r']'

# Apply the regex to detect emojis
df['has_emojis'] = df['description'].str.contains(emoji_pattern, regex=True)

# Apply the regex to detect price terms
df['has_price_terms'] = df['description'].str.contains(r'(فوری|زیر قیمت|توافقی)', regex=True)

  df['has_phone_number'] = df['description'].str.contains(r'\b(\d{3}[-.\s]?\d{3}[-.\s]?\d{4}|\d{8,})\b', regex=True)
  df['has_price_terms'] = df['description'].str.contains(r'(فوری|زیر قیمت|توافقی)', regex=True)


In [None]:
# Map of keywords in description to feature columns
feature_keywords = {
    'has_parking': ['پارکینگ', 'جای پارک'],
    'has_elevator': ['آسانسور'],
    'has_pool': ['استخر'],
    'has_jacuzzi': ['جکوزی'],
    'has_sauna': ['سونا'],
    'has_balcony': ['بالکن', 'تراس'],
    'has_warehouse': ['انباری'],
    'has_heating_system': ['شوفاژ', 'پکیج'],
    'has_cooling_system': ['کولر', 'اسپیلت'],
    'has_security_guard': ['نگهبان', 'حراست'],
    'has_barbecue': ['باربیکیو'],
    'has_water': ['آب'],
    'has_gas': ['گاز'],
    'has_electricity': ['برق']
}

# For each feature column, update it if keyword appears in description
for col, keywords in feature_keywords.items():
    pattern = '|'.join(keywords)

    # Detect keyword presence in the description (skip nulls)
    has_keyword = df['description'].str.contains(pattern, regex=True, na=False)

    # Ensure column is boolean and handle None values
    df[col] = df[col].astype(bool) | has_keyword

In [None]:
# text cleaner using hazm
def clean_persian_text(text, remove_stopwords=True, lemmatize=True):
    if pd.isnull(text):
        return ""

    # Normalize
    text = normalizer.normalize(text)

    # Remove English & Persian digits and letters
    text = re.sub(r"[A-Za-z0-9۰-۹]", " ", text)

    # Remove emojis
    emoji_pattern = re.compile("["
                               u"\U0001F600-\U0001F64F"
                               u"\U0001F300-\U0001F5FF"
                               u"\U0001F680-\U0001F6FF"
                               u"\U0001F1E0-\U0001F1FF"
                               u"\U00002700-\U000027BF"
                               u"\U000024C2-\U0001F251"
                               "]+", flags=re.UNICODE)
    text = emoji_pattern.sub(r'', text)

    # Keep only Persian characters and whitespace
    text = re.sub(r"[^\w\s\u0600-\u06FF]", " ", text)
    text = re.sub(r"\s+", " ", text).strip()

    # Tokenize
    words = text.split()

    if remove_stopwords:
        words = [word for word in words if word not in stopwords]

    if lemmatize:
        words = [lemmatizer.lemmatize(word) for word in words]

    return " ".join(words)

# Apply to text columns
df['description_clean'] = df['description'].apply(clean_persian_text)
df['title_clean'] = df['title'].apply(clean_persian_text)

# Add text statistics
df['description_length'] = df['description_clean'].str.len()
df['description_word_count'] = df['description_clean'].apply(lambda x: len(word_tokenize(x)))
df['description_sentence_count'] = df['description_clean'].apply(lambda x: len(sent_tokenize(x)))

df['title_length'] = df['title_clean'].str.len()
df['title_word_count'] = df['title_clean'].apply(lambda x: len(word_tokenize(x)))

# Stopword ratio
def stopword_ratio(text):
    words = word_tokenize(text)
    if not words:
        return 0
    stop_count = sum(1 for w in words if w in stopwords)
    return stop_count / len(words)

df['stopword_ratio'] = df['description_clean'].apply(stopword_ratio)

In [None]:
# Price per square meter
# Apply condition for calculating price_per_sqm
df['price_per_sqm'] = np.where(
    df['listing_type'].str.contains('sell', case=False, na=False),
    df['price_value'] / df['building_size'],  # If listing type has 'sell'
    df['unified_price'] / df['building_size']  # Otherwise, use unified_price
)

# Replace infinite values with NaN
df['price_per_sqm'] = df['price_per_sqm'].replace([np.inf, -np.inf], np.nan)

# Remove rows where 'price_per_sqm' is NaN
df = df.dropna(subset=['price_per_sqm'])

In [None]:
# Total amenities
amenities = ['has_balcony', 'has_elevator', 'has_warehouse', 'has_parking',
             'has_pool', 'has_jacuzzi', 'has_sauna', 'has_water',
             'has_electricity', 'has_gas', 'has_heating_system',
             'has_cooling_system', 'has_security_guard', 'has_barbecue']

# Convert string values like 'true', 'false', 'True', 'False' to actual boolean
for col in amenities:
    df[col] = df[col].astype(str).str.lower().map({
        'true': True,
        'false': False
    })

# Fill any remaining NaN with False, then convert to int (True → 1, False → 0)
df[amenities] = df[amenities].fillna(False).astype(int)

# Now compute the total
df['total_amenities'] = df[amenities].sum(axis=1)

In [None]:
# Normalize
scaler = MinMaxScaler()
df['price_normalized'] = scaler.fit_transform(df[['unified_price']])

num_cols_to_scale = [
    'price_per_sqm',
    'description_length',
    'description_word_count',
    'description_sentence_count',
    'title_length',
    'title_word_count'
]

df[num_cols_to_scale] = scaler.fit_transform(df[num_cols_to_scale])

In [None]:
df[['description_length', 'title_length', 'unified_price', 'price_per_sqm',
    'total_amenities', 'price_normalized']].head()

Unnamed: 0,description_length,title_length,unified_price,price_per_sqm,total_amenities,price_normalized
1,0.062822,0.275862,8500000000.0,3.1875e-07,3,1.698e-06
2,0.015448,0.241379,3000000000.0,5.113636e-08,3,5.98e-07
3,0.15654,0.551724,3800000000.0,9.5e-08,2,7.58e-07
4,0.31102,0.37931,5750000000.0,1.125e-07,6,1.148e-06
5,0.161689,0.448276,1500000000.0,3.375e-08,3,2.98e-07


In [None]:
df

Unnamed: 0,listing_type,property_type,city_slug,neighborhood_slug,created_at_month,user_type,description,title,rent_mode,rent_value,rent_to_single,rent_type,price_mode,price_value,credit_mode,credit_value,rent_credit_transform,transformable_price,transformable_credit,transformed_credit,transformable_rent,transformed_rent,land_size,building_size,deed_type,has_business_deed,floor,rooms_count,total_floors_count,unit_per_floor,has_balcony,has_elevator,has_warehouse,has_parking,construction_year,is_rebuilt,has_water,has_warm_water_provider,has_electricity,has_gas,has_heating_system,has_cooling_system,has_restroom,has_security_guard,has_barbecue,building_direction,has_pool,has_jacuzzi,has_sauna,floor_material,regular_person_capacity,extra_person_capacity,cost_per_extra_person,rent_price_on_regular_days,rent_price_on_special_days,rent_price_at_weekends,location_latitude,location_longitude,location_radius,unified_price,log_unified_price,month,day,has_multiple_pricing_options,has_phone_number,has_emojis,has_price_terms,description_clean,title_clean,description_length,description_word_count,description_sentence_count,title_length,title_word_count,stopword_ratio,price_per_sqm,total_amenities,price_normalized
1,residential-sell,apartment,tehran,gholhak,2024-05-01,مشاور املاک,دسترسی عالی به مترو و شریعتی \nمشاعات تمیز \nب...,۶۰ متر قلهک فول امکانات,,,,,مقطوع,8.500000e+09,,,,,,,,,,60.0,,,3,,,,0,1,1,1,1384.0,,0,,0,0,0,0,,0,0,,0,0,0,,,,,,,,,,500.0,8.500000e+09,22.863332,2,12,False,False,False,False,دسترسی مترو شریعتی مشاع تمیز ایراد بازدید فروش...,قلهک فول امکانات,0.062822,0.033223,1.0,0.275862,0.12,0.0,3.187500e-07,3,1.698000e-06
2,residential-rent,apartment,tehran,tohid,2024-10-01,,تخلیه پایان ماه,آپارتمان ۳ خوابه ۱۳۲ متر,مقطوع,26000000.0,,,,,مقطوع,7.500000e+08,False,,7.500000e+08,,26000000.0,,,132.0,,,3,,,,0,1,1,1,1401.0,False,0,,0,0,0,0,,0,0,,0,0,0,,,,,,,,35.703865,51.373459,,3.000000e+09,21.821878,7,10,False,False,False,False,تخلیه پایان ماه,آپارتمان خوابه,0.015448,0.009967,1.0,0.241379,0.08,0.0,5.113636e-08,3,5.980000e-07
3,commercial-rent,office,tehran,elahiyeh,2024-06-01,,فرشته تاپ لوکیشن\n۹۰ متر موقعیت اداری\nیک اتاق...,فرشته ۹۰ متر دفتر کار مدرن موقعیت اداری,مقطوع,95000000.0,,,,,مقطوع,9.500000e+08,False,,9.500000e+08,,95000000.0,,,90.0,,,4,,,,0,1,0,1,1400.0,,0,,0,0,0,0,,0,0,,0,0,0,,,,,,,,,,,3.800000e+09,22.058267,3,12,False,False,False,False,فرشته تاپ لوکیشن موقعیت اداری اتاق مستر دید وی...,فرشته دفتر کار مدرن موقعیت اداری,0.156540,0.096346,1.0,0.551724,0.24,0.0,9.500000e-08,2,7.580000e-07
4,residential-sell,apartment,mashhad,emamreza,2024-05-01,مشاور املاک,هلدینگ ساختمانی اکبری\n\nهمراه شما هستیم برای ...,۱۱۵ متری/شمالی رو به آفتاب/اکبری,,,,,مقطوع,5.750000e+09,,,,,,,,,,115.0,single_page,,4,,6,,1,1,1,1,1403.0,,0,package,0,0,1,1,squat_seat,0,0,north,0,0,0,ceramic,,,,,,,,,,5.750000e+09,22.472466,2,12,False,False,True,False,هلدینگ ساختمان اکبری همراه خرید#خر مطمئن کلید ...,متری شمالی آفتاب اکبری,0.311020,0.182724,1.0,0.379310,0.16,0.0,1.125000e-07,6,1.148000e-06
5,residential-rent,apartment,ahvaz,mellirah,2024-09-01,,اپارتمان ۱۰۰متری واحد جلو دارای دو اتاق خواب ک...,اپارتمان ۱۰۰متری ملی راه واحد جلو,مقطوع,6000000.0,,,,,مقطوع,2.500000e+08,False,,2.500000e+08,4.000000e+08,6000000.0,1.0,,100.0,,,3,,3,2,1,0,1,1,1389.0,False,0,water_heater,0,0,0,0,squat,0,0,,0,0,0,carpet,,,,,,,,,,1.500000e+09,21.128731,6,11,True,False,False,False,اپارتمان متری واحد جلو اتاق خواب کمددیواری کاب...,اپارتمان متری ملی واحد جلو,0.161689,0.096346,1.0,0.448276,0.20,0.0,3.375000e-08,3,2.980000e-07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
999992,residential-sell,apartment,eslamshahr,,2024-11-01,,متریال استفاده شده در نما بهترین نوع با جزئیات...,اپارتمان ۱۰۵ متری خ مطهری,,,,,مقطوع,4.350000e+09,,,,,,,,,,105.0,single_page,,2,,5,2,1,1,1,1,1401.0,False,0,package,1,0,1,1,squat_seat,0,0,south,0,0,0,ceramic,,,,,,,,,,4.350000e+09,22.193442,8,11,False,False,False,False,متریال استفاده نما جزئیات جذاب سنگ دیواره وکف ...,اپارتمان متری خ مطهری,0.245108,0.139535,1.0,0.362069,0.16,0.0,9.321428e-08,7,8.680000e-07
999993,residential-rent,apartment,mashhad,elahiyehblvd,2024-06-01,مشاور املاک,با سلام \n50متر یکخوابه \nطبقه اول \nجای پارک ...,50متر یکخوابه حاشیه اقدسیه,مقطوع,2000000.0,,rent_credit,,,مقطوع,1.500000e+08,False,,1.500000e+08,,2000000.0,,,50.0,,,1,,3,5,1,0,0,1,1386.0,True,1,,0,0,0,1,squat,0,0,south,0,0,0,wood_parquet,,,,,,,36.374050,59.485794,500.0,9.000000e+08,20.617905,3,12,False,False,False,False,سلام یکخوابه طبقه پارک فراوان دسترسی ها کلیه ا...,یکخوابه حاشیه اقدسیه,0.142122,0.083056,1.0,0.344828,0.12,0.0,4.050000e-08,4,1.780000e-07
999995,residential-sell,apartment,kermanshah,,2024-07-01,مشاور املاک,~~~مشاورین املاک قبادی~~~\n■جنوبی تک واحدی\n■د...,آپارتمان ۱۸۰ متری وحدت غربی,,,,,مقطوع,7.470000e+09,,,,,,,,,,180.0,,,4,,,,0,1,1,1,1403.0,,0,,0,0,0,0,,0,0,,0,0,0,,,,,,,,34.350235,47.083241,500.0,7.470000e+09,22.734161,4,11,False,False,False,False,مشاورین املاک قبادی جنوبی تک واحدی خواب لوکیشن...,آپارتمان متری وحدت غربی,0.144181,0.079734,1.0,0.396552,0.16,0.0,9.337500e-08,3,1.492000e-06
999996,residential-rent,apartment,tehran,darya,2024-07-01,مشاور املاک,نوساز \n\n تک واحدی\n\nشخصی ساز\n\nروف گا...,آپارتمان ۱۱۰ متری سعادت آباد دریا,مقطوع,45000000.0,,rent_credit,,,مقطوع,1.000000e+09,True,,1.000000e+09,3.000000e+09,45000000.0,100000.0,,110.0,,,1,,,,0,1,1,1,1403.0,,0,,0,0,0,0,,0,0,,0,0,0,,,,,,,,35.770454,51.369099,500.0,4.000000e+09,22.109560,4,11,True,False,False,False,نوساز تک واحدی روف گاردن میز بیلیارد فرنگی اطا...,آپارتمان متری سعادت آباد دریا,0.107106,0.066445,1.0,0.500000,0.20,0.0,8.181818e-08,3,7.980000e-07


In [None]:
# df.to_csv('cleaned_data.csv', index=False)
df.to_csv('/content/drive/MyDrive/cleaned_data.csv', index=False)