In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import os

In [5]:
df = pd.read_csv(r"/content/housing.csv")

In [6]:
print(df.head(5))

   longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \
0    -122.23     37.88                41.0        880.0           129.0   
1    -122.22     37.86                21.0       7099.0          1106.0   
2    -122.24     37.85                52.0       1467.0           190.0   
3    -122.25     37.85                52.0       1274.0           235.0   
4    -122.25     37.85                52.0       1627.0           280.0   

   population  households  median_income  median_house_value ocean_proximity  
0       322.0       126.0         8.3252            452600.0        NEAR BAY  
1      2401.0      1138.0         8.3014            358500.0        NEAR BAY  
2       496.0       177.0         7.2574            352100.0        NEAR BAY  
3       558.0       219.0         5.6431            341300.0        NEAR BAY  
4       565.0       259.0         3.8462            342200.0        NEAR BAY  


In [7]:
# Step 2: Apply Log Transformation to reduce skewness
df['log_price'] = np.log(df['median_house_value'])

In [10]:
print("\nðŸ”¹ After Log Transformation:\n", df[['median_house_value', 'log_price']].head(10))


ðŸ”¹ After Log Transformation:
    median_house_value  log_price
0            452600.0  13.022764
1            358500.0  12.789684
2            352100.0  12.771671
3            341300.0  12.740517
4            342200.0  12.743151
5            269700.0  12.505066
6            299200.0  12.608868
7            241400.0  12.394211
8            226700.0  12.331383
9            261100.0  12.472659


In [13]:
# Step 3: Apply Scaling (Normalization & Standardization)
minmax = MinMaxScaler()
standard = StandardScaler()
df['room_minmax'] = minmax.fit_transform(df[['total_rooms']])
df['room_standard'] = standard.fit_transform(df[['total_rooms']])

In [15]:
print("\n after scaling \n")
print(df[['total_rooms', 'room_minmax','room_standard']].head(10))



 after scaling 

   total_rooms  room_minmax  room_standard
0        880.0     0.022331      -0.804819
1       7099.0     0.180503       2.045890
2       1467.0     0.037260      -0.535746
3       1274.0     0.032352      -0.624215
4       1627.0     0.041330      -0.462404
5        919.0     0.023323      -0.786942
6       2535.0     0.064423      -0.046188
7       3104.0     0.078895       0.214634
8       2555.0     0.064932      -0.037021
9       3549.0     0.090213       0.418616


In [16]:
# Step 4: Feature Engineering (new useful features)

#rooms per househole
df['rooms_per_household'] = df['total_rooms'] / df['households']
#bedrooms per rooms
df['bedroom_per_room'] = df['total_bedrooms'] /df['total_rooms']
#population per househols
df['population_per_household'] = df['population'] / df['households']

In [17]:
print("\nðŸ”¹ After Feature Engineering:")
print(df[['total_rooms', 'households', 'rooms_per_household',
          'total_bedrooms', 'bedroom_per_room',
          'population', 'population_per_household']].head())


ðŸ”¹ After Feature Engineering:
   total_rooms  households  rooms_per_household  total_bedrooms  \
0        880.0       126.0             6.984127           129.0   
1       7099.0      1138.0             6.238137          1106.0   
2       1467.0       177.0             8.288136           190.0   
3       1274.0       219.0             5.817352           235.0   
4       1627.0       259.0             6.281853           280.0   

   bedroom_per_room  population  population_per_household  
0          0.146591       322.0                  2.555556  
1          0.155797      2401.0                  2.109842  
2          0.129516       496.0                  2.802260  
3          0.184458       558.0                  2.547945  
4          0.172096       565.0                  2.181467  
