In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
import joblib
import numpy as np

# Load dataset
df = pd.read_csv('Cleaned_KL_Housing_Dataset.csv')

In [2]:
df


Unnamed: 0,Location,Price,Rooms,Bathrooms,Car Parks,Property Type,Furnishing,Build Type,Sqft
0,KLCC,1.25,3.0,3.0,2.0,Serviced Residence,Fully Furnished,Built-up,1335.0
1,Dutamas,1.03,3.0,4.0,2.0,Condominium,Partly Furnished,Built-up,1875.0
2,Bukit Jalil,0.90,5.0,3.0,2.0,Condominium,Partly Furnished,Built-up,1513.0
3,Taman Tun Dr Ismail,2.60,5.0,4.0,4.0,Semi-detached House,Partly Furnished,Land area,3600.0
4,Mont Kiara,1.78,5.0,4.0,2.0,Condominium,Partly Furnished,Built-up,1830.0
...,...,...,...,...,...,...,...,...,...
21506,Seputeh,0.75,3.0,2.0,1.0,Condominium,Partly Furnished,Built-up,915.0
21507,KL Sentral,1.40,4.0,3.0,2.0,Condominium,Fully Furnished,Land area,1544.0
21508,KL Eco City,0.88,1.0,1.0,1.0,Condominium,Partly Furnished,Built-up,650.0
21509,Sri Hartamas,2.70,6.0,6.0,3.0,Condominium,Partly Furnished,Built-up,3973.0


In [3]:
location_counts = df['Build Type'].value_counts().head(20)

print(location_counts)


Built-up     19323
Land area     2188
Name: Build Type, dtype: int64


In [4]:
# Get the top 20 locations
top_20_locations = df['Location'].value_counts().head(50).index

# Filter the DataFrame to include only rows with these top 20 locations
filtered_df = df[df['Location'].isin(top_20_locations)]

# Calculate the average price for each top 20 location
average_price = filtered_df.groupby('Location')['Price'].mean()

print(average_price)


Location
Ampang                               1.259290
Ampang Hilir                         2.510625
Bandar Damai Perdana                 0.817922
Bandar Menjalara                     0.794423
Bandar Tasik Selatan                 0.697094
Bangsar                              2.893196
Bangsar South                        0.883673
Batu Caves                           0.698028
Brickfields                          0.768045
Bukit Bintang                        1.929402
Bukit Jalil                          0.932634
Bukit Tunku (Kenny Hills)            2.298081
Cheras                               0.803018
City Centre                          1.700113
Damansara                            2.142778
Damansara Heights                    2.534742
Desa Pandan                          1.034474
Desa ParkCity                        1.786044
Dutamas                              1.080094
Jalan Ipoh                           0.722458
Jalan Klang Lama (Old Klang Road)    0.828681
Jalan Kuching            

In [7]:
unique_locations = df['Location'].unique()
print(unique_locations)

['KLCC' 'Dutamas' 'Bukit Jalil' 'Taman Tun Dr Ismail' 'Mont Kiara'
 'Desa ParkCity' 'Damansara Heights' 'Ampang Hilir'
 'Jalan Klang Lama (Old Klang Road)' 'Bangsar South' 'KL City'
 'Sungai Besi' 'KL Sentral' 'Setapak' 'City Centre' 'Taman Desa' 'Sentul'
 'Bangsar' 'Segambut' 'Wangsa Maju' 'Batu Caves' 'Klcc' 'Setiawangsa'
 'Bukit Bintang' 'Chan Sow Lin' 'Jalan Kuching' 'Sri Petaling'
 'Taman Melawati' 'Cheras' 'Ampang' 'KL Eco City' 'Pantai' 'Kuchai Lama'
 'Kepong' 'Seputeh' 'Bukit Kiara' 'Bandar Menjalara' 'OUG'
 'Bukit Tunku (Kenny Hills)' 'Desa Pandan' 'Jalan Ipoh' 'Sri Hartamas'
 'Sunway SPK' 'Brickfields' 'Keramat' 'Pandan Indah' 'Titiwangsa'
 'Jalan Sultan Ismail' 'Damansara' 'Mid Valley City' 'Salak Selatan'
 'Federal Hill' 'Desa Petaling' 'Pandan Perdana' 'Jinjang'
 'Country Heights Damansara' 'Puchong' 'Pandan Jaya'
 'Bandar Damai Perdana' 'Sungai Penchala' 'Taman Duta'
 'Bandar Tasik Selatan' 'Bukit Ledang' 'duta Nusantara' 'Other'
 'Bandar Sri Damansara']
