<a href="https://colab.research.google.com/github/Khamidrees/DEEPTECH_READY-KNOWLEDGE-SHOWCASE-MAY-EDITION/blob/main/nigeria_houses_cleaning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🧹 Nigeria Houses Data Cleaning Notebook
This notebook helps clean and prepare the Nigeria housing dataset for machine learning.

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

# Load data
file_path = "/content/nigeria_houses.csv"
df = pd.read_csv(file_path)
df.head()


In [None]:
# Check for missing values
print("Missing values before cleaning:")
print(df.isnull().sum())

# Fill missing numerical with median
num_cols = ['bedrooms', 'bathrooms', 'toilets', 'parking_space', 'price']
for col in num_cols:
    df[col] = df[col].fillna(df[col].median())

# Fill missing categorical with mode
cat_cols = ['title', 'town', 'state']
for col in cat_cols:
    df[col] = df[col].fillna(df[col].mode()[0])

print("\nMissing values after cleaning:")
print(df.isnull().sum())


In [None]:
# Encode categorical variables using Label Encoding
le = LabelEncoder()
for col in cat_cols:
    df[col] = le.fit_transform(df[col])

df.head()


In [None]:
# Scale numerical features
scaler = StandardScaler()
df[num_cols] = scaler.fit_transform(df[num_cols])
df.head()


In [None]:
# Save cleaned data
df.to_csv("cleaned_nigeria_houses.csv", index=False)
print("✅ Cleaned data saved as 'cleaned_nigeria_houses.csv'")
