# Imports & Load Data

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

# Load the deep-cleaned dataset
df = pd.read_csv('cleaned_combined_listings_second_cleaned.csv')
print("Data shape:", df.shape)

Data shape: (4211, 8)


# Generate New Features

In [2]:
# Price per square foot (existing)
if 'price' in df.columns and 'area' in df.columns:
    df['price_per_sqft'] = df['price'] / df['area']
    # Convert area from sqft to sqm (1 sqft = 0.092903 sqm) and calculate price per sqm
    df['area_sqm'] = df['area'] * 0.092903
    df['price_per_sqm'] = df['price'] / df['area_sqm']
    print("Generated: price_per_sqft, area_sqm, price_per_sqm")

# Total rooms: sum of bedrooms and bathrooms (if available)
if all(col in df.columns for col in ['bedrooms', 'bathrooms']):
    df['total_rooms'] = df['bedrooms'] + df['bathrooms']
    print("Generated: total_rooms")

Generated: price_per_sqft, area_sqm, price_per_sqm
Generated: total_rooms


# Generate New Features

In [3]:
# One-hot encode 'location'
if 'location' in df.columns:
    df = pd.get_dummies(df, columns=['location'], prefix='loc', drop_first=True)
    print("One-hot encoded 'location'")

# Ordinal encode 'bedrooms' categories
from sklearn.preprocessing import OrdinalEncoder
if 'bedrooms' in df.columns:
    oe = OrdinalEncoder()
    df['bedrooms_enc'] = oe.fit_transform(df[['bedrooms']])
    print("Ordinal encoded 'bedrooms' into 'bedrooms_enc'")

One-hot encoded 'location'
Ordinal encoded 'bedrooms' into 'bedrooms_enc'


# Feature Scaling

In [4]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Select features for scaling
scale_std = ['price', 'area', 'price_per_sqft', 'price_per_sqm']
scale_mm = ['total_rooms']

# Standard scaling
std_cols = [c for c in scale_std if c in df.columns]
scaler_std = StandardScaler()
df[std_cols] = scaler_std.fit_transform(df[std_cols])
print("Applied StandardScaler to:", std_cols)

# Min-Max scaling
mm_cols = [c for c in scale_mm if c in df.columns]
scaler_mm = MinMaxScaler()
df[mm_cols] = scaler_mm.fit_transform(df[mm_cols])
print("Applied MinMaxScaler to:", mm_cols)

Applied StandardScaler to: ['price', 'area', 'price_per_sqft', 'price_per_sqm']
Applied MinMaxScaler to: ['total_rooms']


# Save Feature-Engineered Data

In [5]:
df.to_csv('feature_engineered_listings_v2.csv', index=False)
print("Saved with shape:", df.shape)

Saved with shape: (4211, 126)
