In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv('Nawy.csv')

# Data Cleaning&Convert 
df['Area'] = pd.to_numeric(df['Area'].astype(str).str.replace(',', ''), errors='coerce')
df['Price_EGP'] = pd.to_numeric(df['Price_EGP'], errors='coerce')
df['Bedrooms'] = pd.to_numeric(df['Bedrooms'], errors='coerce')
df['Bathrooms'] = pd.to_numeric(df['Bathrooms'], errors='coerce')

# For Price: Use the MEAN (Average) of Price per m2 for high valuations
df_ref = df[(df['Price_EGP'] / df['Area']) < 1000000].copy()
df_ref['Price_per_m2'] = df_ref['Price_EGP'] / df_ref['Area']

price_stats = df_ref.groupby(['Compound_Name', 'Type'])['Price_per_m2'].mean()

# For Rooms: Use the MEDIAN to ensure we get realistic whole numbers
room_stats = df.groupby(['Compound_Name', 'Type']).agg({
    'Bedrooms': 'median', 
    'Bathrooms': 'median'
})

def unified_impute(row):
    # --- STEP A: FILL BEDROOMS & BATHROOMS ---
    bed, bath = row['Bedrooms'], row['Bathrooms']
    if pd.isnull(bed):
        try: bed = room_stats.loc[(row['Compound_Name'], row['Type']), 'Bedrooms']
        except: bed = 3 # Global fallback
    if pd.isnull(bath):
        try: bath = room_stats.loc[(row['Compound_Name'], row['Type']), 'Bathrooms']
        except: bath = 2 # Global fallback

    #  FILL PRICE (BASED ON AREA) ---
    p = row['Price_EGP']
    if pd.isnull(p):
        try:
            # Use the Compound's average rate * this property's specific area
            rate = price_stats.loc[(row['Compound_Name'], row['Type'])]
            p = rate * row['Area']
        except:
            # Global average fallback
            p = df_ref['Price_per_m2'].mean() * row['Area']
            
    return pd.Series([bed, bath, p])

 Execute Imputation
df[['Bedrooms', 'Bathrooms', 'Price_EGP']] = df.apply(unified_impute, axis=1)

#  Final Formatting & Safety Check
# This ensures no NaNs remain so the 'astype(int)' command won't crash
df['Bedrooms'] = df['Bedrooms'].fillna(3).round(0).astype(int)
df['Bathrooms'] = df['Bathrooms'].fillna(2).round(0).astype(int)

df.to_csv('nawy_imputition.csv', index=False)
print("Success! Merged file saved as 'Nawy_imputition.csv'")


