# Feature Engineering 
This notebook generates new features and applies scaling to the cleaned rental listings dataset.

## 1. Import Libraries

In [1]:
import pandas as pd
import numpy as np
from IPython.display import display
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler

## 2. Load Final Cleaned Data

In [2]:
df = pd.read_csv('final_cleaned_listings.csv')
display(df.head())

Unnamed: 0,title,price,location,area,bathrooms,bedrooms,price_per_sqm
0,ADV905*4BHK Villa for rent in Madinat Illam in...,750.0,"Qurum, Muscat•",300.0,4.0,4.0,2.5
1,ADV906**4BHK Villa in a comples in shatti qurum,1900.0,"Qurum, Muscat•",300.0,4.0,4.0,6.333333
2,ADC507*** Office Space in Azaiba – 440 sqm for...,2310.0,"Azaiba, Muscat•",440.0,2.0,2.0,5.25
3,*ADV705** 3+1 BHK Villa for Rent in Bousher –A...,650.0,"Bosher, Muscat•",350.0,4.0,3.0,1.857143
4,4 BR + Maid’s Room Spacious Well-Designed Vill...,750.0,"Azaiba, Muscat•",439.0,5.0,4.0,1.708428


## 3. Generate New Features

In [3]:
# Price per square meter
df['price_per_sqm'] = df['price'] / df['area']
# Total rooms (bedrooms + bathrooms)
df['total_rooms'] = df['bedrooms'] + df['bathrooms']
# Extract district from location (text before comma)
df['district'] = df['location'].apply(lambda x: x.split(',')[0] if isinstance(x, str) else x)
display(df[['price_per_sqm','total_rooms','district']].head())

Unnamed: 0,price_per_sqm,total_rooms,district
0,2.5,8.0,Qurum
1,6.333333,8.0,Qurum
2,5.25,4.0,Azaiba
3,1.857143,7.0,Bosher
4,1.708428,9.0,Azaiba


## 4. Encode Categorical Feature

In [4]:
# Label encode the 'district' feature
le = LabelEncoder()
df['district_encoded'] = le.fit_transform(df['district'])
display(df[['district','district_encoded']].head())

Unnamed: 0,district,district_encoded
0,Qurum,66
1,Qurum,66
2,Azaiba,30
3,Bosher,39
4,Azaiba,30


## 5. Apply Feature Scaling

In [5]:
# Select numeric features to scale
numeric_features = ['price','area','price_per_sqm','total_rooms']

# Min-Max scaling
mm_scaler = MinMaxScaler()
df_mm = df.copy()
df_mm[numeric_features] = mm_scaler.fit_transform(df_mm[numeric_features])
print('Min-Max Scaled Sample:')
display(df_mm.head())

# Standard scaling
ss_scaler = StandardScaler()
df_ss = df.copy()
df_ss[numeric_features] = ss_scaler.fit_transform(df_ss[numeric_features])
print('Standard Scaled Sample:')
display(df_ss.head())

Min-Max Scaled Sample:


Unnamed: 0,title,price,location,area,bathrooms,bedrooms,price_per_sqm,total_rooms,district,district_encoded
0,ADV905*4BHK Villa for rent in Madinat Illam in...,0.00149,"Qurum, Muscat•",0.00296,4.0,4.0,0.049872,0.304348,Qurum,66
1,ADV906**4BHK Villa in a comples in shatti qurum,0.00379,"Qurum, Muscat•",0.00296,4.0,4.0,0.126549,0.304348,Qurum,66
2,ADC507*** Office Space in Azaiba – 440 sqm for...,0.00461,"Azaiba, Muscat•",0.00436,2.0,2.0,0.104879,0.130435,Azaiba,30
3,*ADV705** 3+1 BHK Villa for Rent in Bousher –A...,0.00129,"Bosher, Muscat•",0.00346,4.0,3.0,0.037013,0.26087,Bosher,39
4,4 BR + Maid’s Room Spacious Well-Designed Vill...,0.00149,"Azaiba, Muscat•",0.00435,5.0,4.0,0.034038,0.347826,Azaiba,30


Standard Scaled Sample:


Unnamed: 0,title,price,location,area,bathrooms,bedrooms,price_per_sqm,total_rooms,district,district_encoded
0,ADV905*4BHK Villa for rent in Madinat Illam in...,0.001911,"Qurum, Muscat•",-0.029129,4.0,4.0,-0.26134,0.852294,Qurum,66
1,ADV906**4BHK Villa in a comples in shatti qurum,0.137008,"Qurum, Muscat•",-0.029129,4.0,4.0,0.516717,0.852294,Qurum,66
2,ADC507*** Office Space in Azaiba – 440 sqm for...,0.185173,"Azaiba, Muscat•",0.03805,2.0,2.0,0.296831,-0.346388,Azaiba,30
3,*ADV705** 3+1 BHK Villa for Rent in Bousher –A...,-0.009836,"Bosher, Muscat•",-0.005137,4.0,3.0,-0.391822,0.552624,Bosher,39
4,4 BR + Maid’s Room Spacious Well-Designed Vill...,0.001911,"Azaiba, Muscat•",0.03757,5.0,4.0,-0.422007,1.151965,Azaiba,30


## 6. Save Engineered Dataset

In [6]:
# Save the dataset with new features
df.to_csv('engineered_listings.csv', index=False)
print('Engineered dataset saved to engineered_listings.csv')

Engineered dataset saved to engineered_listings.csv
