# Data Cleaning & Integration 
This notebook walks through loading, cleaning, and merging two rental datasets.

## 1. Import Libraries

In [1]:
import pandas as pd
import numpy as np
import re

## 2. Load Datasets

In [13]:
dubizzle = pd.read_csv('dubizzle_properties_for_rent.csv')
mawa = pd.read_csv('mawa_rent_listings.csv')

## 3. Standardize Column Names

In [4]:
dubizzle = dubizzle.rename(columns={
    'property_name': 'title',
    'price': 'price',
    'beds': 'bedrooms'
})
mawa = mawa.rename(columns={
    'Title': 'title',
    'Price': 'price',
    'Location': 'location',
    'Area': 'area',
    'Bathrooms': 'bathrooms',
    'Bedrooms': 'bedrooms'
})

## 4. Select Relevant Columns

In [5]:
cols = ['title', 'price', 'location', 'area', 'bathrooms', 'bedrooms']
dubizzle = dubizzle[cols]
mawa = mawa[cols]

## 5. Define Cleaning Functions

In [6]:
def clean_price(x):
    if pd.isna(x): return np.nan
    s = str(x).replace('OMR','').replace(',','').strip().lower()
    if 'request' in s: return np.nan
    try: return float(s)
    except: return np.nan

def clean_area(x):
    if pd.isna(x): return np.nan
    m = re.search(r'([\d\.,]+)', str(x))
    return float(m.group(1).replace(',','')) if m else np.nan

## 6. Apply Cleaning Functions

In [7]:
for df in (dubizzle, mawa):
    df['price'] = df['price'].apply(clean_price)
    df['area'] = df['area'].apply(clean_area)
    df['bedrooms'] = pd.to_numeric(df['bedrooms'], errors='coerce')
    df['bathrooms'] = pd.to_numeric(df['bathrooms'], errors='coerce')

## 7. Remove Duplicates

In [8]:
dubizzle = dubizzle.drop_duplicates()
mawa = mawa.drop_duplicates()

## 8. Combine Datasets & Export

In [10]:
combined = pd.concat([dubizzle, mawa], ignore_index=True)
combined = combined.drop_duplicates().reset_index(drop=True)
combined.to_csv('cleaned_combined_listings.csv', index=False)
print('Saved cleaned_combined_listings.csv')

Saved cleaned_combined_listings.csv


## 9. Preview Combined Data

In [12]:
display(combined.head())

Unnamed: 0,title,price,location,area,bathrooms,bedrooms
0,ADV905*4BHK Villa for rent in Madinat Illam in...,750.0,"Qurum, Muscat•",300.0,4.0,4.0
1,ADV906**4BHK Villa in a comples in shatti qurum,1900.0,"Qurum, Muscat•",300.0,4.0,4.0
2,ADC507*** Office Space in Azaiba – 440 sqm for...,2310.0,"Azaiba, Muscat•",440.0,,
3,*ADV705** 3+1 BHK Villa for Rent in Bousher –A...,650.0,"Bosher, Muscat•",350.0,4.0,3.0
4,4 BR + Maid’s Room Spacious Well-Designed Vill...,750.0,"Azaiba, Muscat•",439.0,5.0,4.0
