###  Import Required Libraries

In [1]:
import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split 
from sklearn.impute import SimpleImputer 
from sklearn.preprocessing import StandardScaler, MinMaxScaler

### Load the Dataset

In [2]:
# Simulated house price dataset 
data = { 
    'Area (sq ft)': [2000, 1500, 1800, 2200, np.nan, 2500, 2700, 1600, 1400, 2100], 
    'Bedrooms': [3, 2, 3, 4, 3, 5, np.nan, 2, 2, 4], 
    'Price ($1000s)': [500, 350, 450, 600, 400, 750, 800, 300, 280, 650] 
} 
df = pd.DataFrame(data) 
print("Original Dataset:\n", df) 

Original Dataset:
    Area (sq ft)  Bedrooms  Price ($1000s)
0        2000.0       3.0             500
1        1500.0       2.0             350
2        1800.0       3.0             450
3        2200.0       4.0             600
4           NaN       3.0             400
5        2500.0       5.0             750
6        2700.0       NaN             800
7        1600.0       2.0             300
8        1400.0       2.0             280
9        2100.0       4.0             650


### Handle Missing Values using SimpleImputer

In [3]:
# Define imputer (Replace NaN with column mean) 
imputer = SimpleImputer(strategy='mean') 
 
# Apply imputer to dataset 
df_imputed = pd.DataFrame(imputer.fit_transform(df), columns=df.columns) 
 
print("\nDataset after Handling Missing Values:\n", df_imputed)


Dataset after Handling Missing Values:
    Area (sq ft)  Bedrooms  Price ($1000s)
0   2000.000000  3.000000           500.0
1   1500.000000  2.000000           350.0
2   1800.000000  3.000000           450.0
3   2200.000000  4.000000           600.0
4   1977.777778  3.000000           400.0
5   2500.000000  5.000000           750.0
6   2700.000000  3.111111           800.0
7   1600.000000  2.000000           300.0
8   1400.000000  2.000000           280.0
9   2100.000000  4.000000           650.0


### Splitting Data into Training and Testing Sets

In [4]:
# Features and target variable 
X = df_imputed[['Area (sq ft)', 'Bedrooms']]  # Features 
y = df_imputed['Price ($1000s)']  # Target variable 
 
# Split data (80% training, 20% testing) 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) 
 
print("\nTraining Set Size:", X_train.shape) 
print("Testing Set Size:", X_test.shape) 


Training Set Size: (8, 2)
Testing Set Size: (2, 2)


### Feature Scaling using StandardScaler and MinMaxScaler

In [5]:
# Standardization (mean = 0, variance = 1) 
scaler_standard = StandardScaler() 
X_train_standardized = scaler_standard.fit_transform(X_train) 
X_test_standardized = scaler_standard.transform(X_test)

# Normalization (Scaling between 0 and 1) 
scaler_minmax = MinMaxScaler() 
X_train_normalized = scaler_minmax.fit_transform(X_train) 
X_test_normalized = scaler_minmax.transform(X_test) 
 
print("\nStandardized Data (First 3 rows):\n", X_train_standardized[:3]) 
print("\nNormalized Data (First 3 rows):\n", X_train_normalized[:3]) 


Standardized Data (First 3 rows):
 [[ 1.16736275  1.89175141]
 [-0.32819095 -0.45662965]
 [-1.52463391 -1.63082018]]

Normalized Data (First 3 rows):
 [[0.81818182 1.        ]
 [0.36363636 0.33333333]
 [0.         0.        ]]
