In [11]:
import pandas as pd
import re
from sklearn.feature_extraction.text import CountVectorizer # scikit-learn 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from geopy.distance import geodesic


In [12]:
prop = pd.read_csv('uae-real-estate.csv')
prop.head() #5

Unnamed: 0,Name,B_type,Latitude,Longitude,Area,Bedrooms,Bathrooms,City,District,Price
0,Elegant Unit| Golf Views| High-Floor| Prime Area,Apartment,24.46386,54.595603,747,0,1,Abu Dhabi,Yas Island,1300000
1,Sea & City View | Full Facilities | Prime Area,Apartment,24.487354,54.395122,1524,2,3,Abu Dhabi,Al Reem Island,1700000
2,Hot Offer|Furnished 2BR+M|Full Sea View|High F...,Apartment,24.492602,54.392677,1615,2,3,Abu Dhabi,Al Reem Island,1650000
3,Amazing Deal| Huge Layout | Big Terrace| Inves...,Apartment,24.484639,54.404103,1220,1,2,Abu Dhabi,Al Reem Island,800000
4,Luxurious Lifestyl|Exclusive Amenities|Invest Now,Apartment,24.538734,54.410601,5315,4,5,Abu Dhabi,Saadiyat Island,19000000


In [13]:
prop.shape

(29905, 10)

In [14]:
prop.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29905 entries, 0 to 29904
Data columns (total 10 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Name       29905 non-null  object 
 1   B_type     29905 non-null  object 
 2   Latitude   29905 non-null  float64
 3   Longitude  29905 non-null  float64
 4   Area       29905 non-null  object 
 5   Bedrooms   29905 non-null  int64  
 6   Bathrooms  29905 non-null  int64  
 7   City       29905 non-null  object 
 8   District   29905 non-null  object 
 9   Price      29905 non-null  int64  
dtypes: float64(2), int64(3), object(5)
memory usage: 2.3+ MB


In [15]:
# @title **3.3 Data Preprocessing**
def extract_view(Name):
    match = re.search(r'\b(\w+ View)\b', str(Name))
    if match:
        return match.group(1).strip()
    return None

# Apply the function to create a new column 'view'
prop['View'] = prop['Name'].apply(extract_view)

prop.head()

Unnamed: 0,Name,B_type,Latitude,Longitude,Area,Bedrooms,Bathrooms,City,District,Price,View
0,Elegant Unit| Golf Views| High-Floor| Prime Area,Apartment,24.46386,54.595603,747,0,1,Abu Dhabi,Yas Island,1300000,
1,Sea & City View | Full Facilities | Prime Area,Apartment,24.487354,54.395122,1524,2,3,Abu Dhabi,Al Reem Island,1700000,City View
2,Hot Offer|Furnished 2BR+M|Full Sea View|High F...,Apartment,24.492602,54.392677,1615,2,3,Abu Dhabi,Al Reem Island,1650000,Sea View
3,Amazing Deal| Huge Layout | Big Terrace| Inves...,Apartment,24.484639,54.404103,1220,1,2,Abu Dhabi,Al Reem Island,800000,
4,Luxurious Lifestyl|Exclusive Amenities|Invest Now,Apartment,24.538734,54.410601,5315,4,5,Abu Dhabi,Saadiyat Island,19000000,


In [16]:
def extract_keywords(text, n_keywords=5):
    vectorizer = CountVectorizer(stop_words='english', max_features=n_keywords)
    try:
        X = vectorizer.fit_transform([str(text)])
        keywords = vectorizer.get_feature_names_out()
        return ', '.join(keywords) if keywords.size > 0 else "no keywords"
    except ValueError:
        return "no keywords"

prop['Bulding_Descripton'] = prop['Name'].apply(lambda x: extract_keywords(str(x), n_keywords=5))

prop.head()

Unnamed: 0,Name,B_type,Latitude,Longitude,Area,Bedrooms,Bathrooms,City,District,Price,View,Bulding_Descripton
0,Elegant Unit| Golf Views| High-Floor| Prime Area,Apartment,24.46386,54.595603,747,0,1,Abu Dhabi,Yas Island,1300000,,"area, elegant, floor, golf, high"
1,Sea & City View | Full Facilities | Prime Area,Apartment,24.487354,54.395122,1524,2,3,Abu Dhabi,Al Reem Island,1700000,City View,"area, city, facilities, prime, sea"
2,Hot Offer|Furnished 2BR+M|Full Sea View|High F...,Apartment,24.492602,54.392677,1615,2,3,Abu Dhabi,Al Reem Island,1650000,Sea View,"2br, floor, furnished, high, hot"
3,Amazing Deal| Huge Layout | Big Terrace| Inves...,Apartment,24.484639,54.404103,1220,1,2,Abu Dhabi,Al Reem Island,800000,,"amazing, big, deal, huge, invest"
4,Luxurious Lifestyl|Exclusive Amenities|Invest Now,Apartment,24.538734,54.410601,5315,4,5,Abu Dhabi,Saadiyat Island,19000000,,"amenities, exclusive, invest, lifestyl, luxurious"


In [17]:
# @title
prop.columns = prop.columns.str.strip()
prop['Area'] = prop['Area'].str.replace(',', '').astype(float)
#Price per Square feet
prop['Price_per_Sqft'] = prop['Price'] / prop['Area']
#Average Property Price for the City
City_avg_price = prop.groupby('City')['Price'].mean().to_dict()
prop['City_Avg_Price'] = prop['City'].map(City_avg_price)
prop.drop(columns=['Name'], axis=1, inplace= True)
prop.head()

Unnamed: 0,B_type,Latitude,Longitude,Area,Bedrooms,Bathrooms,City,District,Price,View,Bulding_Descripton,Price_per_Sqft,City_Avg_Price
0,Apartment,24.46386,54.595603,747.0,0,1,Abu Dhabi,Yas Island,1300000,,"area, elegant, floor, golf, high",1740.294511,4329870.0
1,Apartment,24.487354,54.395122,1524.0,2,3,Abu Dhabi,Al Reem Island,1700000,City View,"area, city, facilities, prime, sea",1115.485564,4329870.0
2,Apartment,24.492602,54.392677,1615.0,2,3,Abu Dhabi,Al Reem Island,1650000,Sea View,"2br, floor, furnished, high, hot",1021.671827,4329870.0
3,Apartment,24.484639,54.404103,1220.0,1,2,Abu Dhabi,Al Reem Island,800000,,"amazing, big, deal, huge, invest",655.737705,4329870.0
4,Apartment,24.538734,54.410601,5315.0,4,5,Abu Dhabi,Saadiyat Island,19000000,,"amenities, exclusive, invest, lifestyl, luxurious",3574.788335,4329870.0


In [18]:
# @title
# Create a new column named 'Id' as the index
prop['Id'] = range(len(prop))

# Set 'Id' as the first column
first_column = prop.pop('Id')
prop.insert(0, 'Id', first_column)

# Move the 'Price' column to the last
last_column = prop.pop('Price')
prop.insert(len(prop.columns), 'Price', last_column)
prop.set_index('Id', inplace=True)

# Print the first few rows of the modified DataFrame
prop.head()

Unnamed: 0_level_0,B_type,Latitude,Longitude,Area,Bedrooms,Bathrooms,City,District,View,Bulding_Descripton,Price_per_Sqft,City_Avg_Price,Price
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,Apartment,24.46386,54.595603,747.0,0,1,Abu Dhabi,Yas Island,,"area, elegant, floor, golf, high",1740.294511,4329870.0,1300000
1,Apartment,24.487354,54.395122,1524.0,2,3,Abu Dhabi,Al Reem Island,City View,"area, city, facilities, prime, sea",1115.485564,4329870.0,1700000
2,Apartment,24.492602,54.392677,1615.0,2,3,Abu Dhabi,Al Reem Island,Sea View,"2br, floor, furnished, high, hot",1021.671827,4329870.0,1650000
3,Apartment,24.484639,54.404103,1220.0,1,2,Abu Dhabi,Al Reem Island,,"amazing, big, deal, huge, invest",655.737705,4329870.0,800000
4,Apartment,24.538734,54.410601,5315.0,4,5,Abu Dhabi,Saadiyat Island,,"amenities, exclusive, invest, lifestyl, luxurious",3574.788335,4329870.0,19000000


In [19]:
#@title **3.4 Create Distance_from_City_Center column**
city_centers = {
    'Abu Dhabi': {'lat': 24.453884, 'lon': 54.377344},
    'Dubai': {'lat': 25.276987, 'lon': 55.296249},
    'Sharjah': {'lat': 25.346255, 'lon': 55.420933},
    'Ajman': {'lat': 25.405216, 'lon': 55.513643},
    'Umm Al Quwain': {'lat': 25.564733, 'lon': 55.555174},
    'Ras Al Khaimah': {'lat': 25.800692, 'lon': 55.976200},
}

def haversine(lat1, lon1, lat2, lon2):
    # Convert latitude and longitude from degrees to radians
    lat1, lon1, lat2, lon2 = np.radians([lat1, lon1, lat2, lon2])

    # Haversine formula
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat / 2.0) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2.0) ** 2
    c = 2 * np.arcsin(np.sqrt(a))

    # Radius of Earth in kilometers (mean radius)
    r = 6371.0
    return c * r

def find_nearest_city_center(lat, lon, city_centers):
    distances = {city: haversine(lat, lon, coords['lat'], coords['lon']) for city, coords in city_centers.items()}
    nearest_city = min(distances, key=distances.get)
    return nearest_city, distances[nearest_city]

# Calculate the distance to the nearest city center for each property
prop['Distance_from_City_Center'] = prop.apply(
    lambda row: find_nearest_city_center(row['Latitude'], row['Longitude'], city_centers)[1],
    axis=1
)

In [20]:
prop


Unnamed: 0_level_0,B_type,Latitude,Longitude,Area,Bedrooms,Bathrooms,City,District,View,Bulding_Descripton,Price_per_Sqft,City_Avg_Price,Price,Distance_from_City_Center
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,Apartment,24.463860,54.595603,747.0,0,1,Abu Dhabi,Yas Island,,"area, elegant, floor, golf, high",1740.294511,4.329870e+06,1300000,22.119166
1,Apartment,24.487354,54.395122,1524.0,2,3,Abu Dhabi,Al Reem Island,City View,"area, city, facilities, prime, sea",1115.485564,4.329870e+06,1700000,4.133802
2,Apartment,24.492602,54.392677,1615.0,2,3,Abu Dhabi,Al Reem Island,Sea View,"2br, floor, furnished, high, hot",1021.671827,4.329870e+06,1650000,4.576366
3,Apartment,24.484639,54.404103,1220.0,1,2,Abu Dhabi,Al Reem Island,,"amazing, big, deal, huge, invest",655.737705,4.329870e+06,800000,4.362280
4,Apartment,24.538734,54.410601,5315.0,4,5,Abu Dhabi,Saadiyat Island,,"amenities, exclusive, invest, lifestyl, luxurious",3574.788335,4.329870e+06,19000000,10.017048
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29900,Townhouses,25.691032,55.784255,3702.0,4,3,Ras Al Khaimah,Al Hamra Village,,"beach, br, launch, prices, villa",1958.400864,4.509206e+06,7250000,22.765497
29901,Townhouses,25.691032,55.784255,2110.0,3,2,Ras Al Khaimah,Al Hamra Village,,"access, beach, br, canal, independent",1510.426540,4.509206e+06,3187000,22.765497
29902,Townhouses,25.722171,55.844276,1964.0,2,3,Ras Al Khaimah,Mina Al Arab,,"bedrooms, maid, marbella, sensational",1145.621181,4.509206e+06,2250000,15.835813
29903,Townhouses,25.780455,56.000190,18000.0,10,10,Ras Al Khaimah,Seih Al Uraibi,,"compound, deal, great, ready, townhouses",361.111111,4.509206e+06,6500000,3.291280


# 2. Exploratory Data Analysis

In [None]:
#@title **4.1 Price, Area, Square Per Foot Distribution**
fig, axs = plt.subplots(2, 2, figsize=(16, 12))

# Price distribution
sns.histplot(prop['Price'], kde=True, ax=axs[0, 0])
axs[0, 0].set_title('Price Distribution')

# Area Distribution
sns.histplot(prop['Area'], kde=True, ax=axs[0, 1])
axs[0, 1].set_title('Area Distribution')

# Price per Square Foot Distribution
sns.histplot(prop['Price_per_Sqft'], kde=True, ax=axs[1, 0])
axs[1, 0].set_title('Price per Square Foot Distribution')
# Adjust layout to prevent overlap
#plt.tight_layout()

# Show the plot
plt.show()

In [None]:
#@title **4.2 Correlation Matrix of High Correlations**
# Get the numeric variables
numeric_vars = prop.select_dtypes(include='number')

# Save the names of numeric variables
numeric_var_names = numeric_vars.columns.tolist()

# Output the count of numeric variables
print(f'There are {len(numeric_var_names)} numeric variables.')
all_numVar = prop[numeric_var_names]

# Calculate correlations of all numeric variables
cor_numVar = all_numVar.corr(method='pearson')

# Sort on decreasing correlations with 'Price'
cor_sorted = cor_numVar['Price'].sort_values(ascending=False).to_frame()

# Select only high correlations
CorHigh = cor_sorted[cor_sorted.abs() > 0.5].index

# Filter the correlation matrix for high correlations
cor_numVar = cor_numVar.loc[CorHigh, CorHigh]

# Plotting the correlation matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cor_numVar, annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Matrix of High Correlations')
plt.show()

In [None]:
#@title **4.3 Property Locations and Prices**
sns.scatterplot(x='Longitude', y='Latitude', hue='Price', data=prop, palette='viridis')
plt.title('Property Locations and Prices')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.show()

In [None]:
#@title **4.4 Relationship between price with area,Price_per_Sqft,Location, Property Type**
# Price vs Area
fig, axs = plt.subplots(2, 2, figsize=(16, 12))

# Scatter Plot 1: Price vs Area
sns.scatterplot(x='Area', y='Price', data=prop, ax=axs[0, 0])
axs[0, 0].set_title('Price vs Area')

# Scatter Plot 2: Price vs Area per Square foot
sns.scatterplot(x='Area', y='Price_per_Sqft', data=prop, ax=axs[0, 1])
axs[0, 1].set_title('Price vs Area per SqM')

# Box Plot 1: Price vs Location (City)
sns.boxplot(x='City', y='Price', data=prop, ax=axs[1, 0])
axs[1, 0].set_title('Price vs Location')
axs[1, 0].tick_params(axis='x', rotation=90)

# Box Plot 2: Price vs Property Type (Building_Type)
sns.boxplot(x='B_type', y='Price', data=prop, ax=axs[1, 1])
axs[1, 1].set_title('Price vs Property Type')
axs[1, 1].tick_params(axis='x', rotation=45)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
#@title **4.5 Pairplot**
#Analyze relationships between multiple numerical variables.
sns.pairplot(prop[['Price', 'Area', 'Bedrooms', 'Bathrooms','Price_per_Sqft']])
plt.show()

In [None]:
#@title **4.6 Average Price per Square Foot by Property Type and City**
prop.groupby(['B_type','City'])['Price_per_Sqft'].mean().sort_values(ascending=False)
plt.figure(figsize=(10, 6))
sns.barplot(x='B_type', y='Price_per_Sqft', hue='City', data=prop)
plt.title('Average Price per Square Foot by Property Type and City')
plt.xlabel('Property Type')
plt.ylabel('Average Price per Square Foot')
plt.xticks(rotation=45)
plt.show()

In [None]:
#@title **4.7 Top 10 Districts by Total Price**
top_10_district=prop.groupby('District')['Price'].sum().sort_values(ascending=False)
top_10_district=top_10_district.head(10)
print(top_10_district)
plt.figure(figsize=(10, 6))
sns.barplot(x=top_10_district.index, y=top_10_district.values)
plt.title('Top 10 Districts by Total Price')
plt.xlabel('District')
plt.ylabel('Total Price')
plt.xticks(rotation=90)
plt.show()

In [None]:
#@title **4.8 Property Type Distribution by City**
prop10=prop.groupby('City')['B_type'].value_counts().sort_values(ascending=False)
print(prop10)
plt.figure(figsize=(10, 6))
sns.countplot(x='City', hue='B_type', data=prop)
plt.title('Property Type Distribution by City')
plt.xticks(rotation=45)
plt.show()

In [None]:
#@title **4.9 Property type distribution**
# Analyze the distribution of property types.
b_type = sns.countplot(x='B_type', data=prop)
print (prop['B_type'].value_counts())
plt.title('Property Type Distribution')
plt.xticks(rotation=45)
plt.show()

In [None]:
#@title **4.10 Top 10 Property Type per District**
top_10_prop_per_type=prop.groupby('District')['B_type'].value_counts().sort_values(ascending=False)
top_10_prop_per_type=top_10_prop_per_type.head(10)
print(top_10_prop_per_type)

In [None]:
#@title **4.11 Building Type by Total Price**
max_price_prop=prop.groupby('B_type')['Price'].max().sort_values(ascending=False)
plt.figure(figsize=(10, 6))
print(max_price_prop)
sns.barplot(x= max_price_prop.index,y=max_price_prop.values)
plt.title('Building Type by Total Price')
plt.xlabel('Building Type')
plt.ylabel('Total Price')
plt.xticks(rotation=90)
plt.show()

In [None]:
#@title **4.12 Total Price by City**
print(prop.groupby('City')['Price'].sum().sort_values(ascending=False))
plt.figure(figsize=(10, 6))
sns.barplot(x=prop.groupby('City')['Price'].sum().sort_values(ascending=False).index, y=prop.groupby('City')['Price'].sum().sort_values(ascending=False).values)
plt.title('Total Price by City')
plt.xlabel('City')
plt.ylabel('Total Price')
plt.xticks(rotation=90)
plt.show()

In [None]:
#@title **4.13 Bedrooms and Bathrooms Distribution**
sns.countplot(x='Bedrooms', data=prop)
plt.title('Bedrooms Distribution')
plt.xticks(rotation=45)
plt.show()
sns.countplot(x='Bathrooms', data=prop)
plt.title('Bathrooms Distribution')
plt.xticks(rotation=45)
plt.show()

In [None]:
#@title **4.14 City with the highest average price**
# Group the data by city and calculate the average price for each city
Region_avg_price = prop.groupby('City')['Price'].mean().sort_values(ascending=False)

# Print the city with the highest average price
print("City with the highest average price:", Region_avg_price)

# Create a bar plot to visualize the average prices by city
plt.figure(figsize=(10, 6))
sns.barplot(x=Region_avg_price.index, y=Region_avg_price.values)
plt.title('Average Property Prices by City')
plt.xlabel('City')
plt.ylabel('Average Price')
plt.xticks(rotation=45)
plt.show()

In [None]:
#@title **4.15 City with the highest price**
# Group the data by city and calculate the average price for each city
Region_avg_price = prop.groupby('City')['Price'].max().sort_values(ascending=False)

# Print the city with the highest average price
print("City with the highest Property price:", Region_avg_price)

# Create a bar plot to visualize the average prices by city
plt.figure(figsize=(10, 6))
sns.barplot(x=Region_avg_price.index, y=Region_avg_price.values)
plt.title('Highest Property Prices by City')
plt.xlabel('City')
plt.ylabel('Average Price')
plt.xticks(rotation=45)
plt.show()

In [None]:
#@title **4.16 Top 20 districts with the highest prices**
# Group the data by district and calculate the price for each district
district_avg_prices = prop.groupby('District')['Price'].max().sort_values(ascending=False)

# Get the top 20 districts with the highest Hightest prices
top_20_districts = district_avg_prices.head(20)

# Print the top 20 districts
print("Top 20 districts with the highest prices:\n", top_20_districts)

# Create a bar plot to visualize the Hightest prices for the top 20 districts
plt.figure(figsize=(12, 6))
sns.barplot(x=top_20_districts.index, y=top_20_districts.values)
plt.title('Highest Property Prices for Top 20 Districts')
plt.xlabel('District')
plt.ylabel('Highest Price')
plt.xticks(rotation=90)
plt.show()

In [None]:
#@title **4.17 Top 20 districts with the highest average prices**
# Group the data by district and calculate the average price for each district
district_avg_prices = prop.groupby('District')['Price'].mean().sort_values(ascending=False)

# Get the top 20 districts with the highest average prices
top_20_districts = district_avg_prices.head(20)

# Print the top 20 districts
print("Top 20 districts with the highest average prices:\n", top_20_districts)

# Create a bar plot to visualize the average prices for the top 20 districts
plt.figure(figsize=(12, 6))
sns.barplot(x=top_20_districts.index, y=top_20_districts.values)
plt.title('Average Property Prices for Top 20 Districts')
plt.xlabel('District')
plt.ylabel('Average Price')
plt.xticks(rotation=90)
plt.show()

In [None]:
#@title **4.18 Distance from city center**

# Define city center coordinates for each emirate
city_centers = {
    'Abu Dhabi': (24.4539, 54.3773),
    'Dubai': (25.276987, 55.296249),
    'Sharjah': (25.3463, 55.4209),
    'Ajman': (25.4052, 55.5136),
    'Ras Al Khaimah': (25.8007, 55.9764),
    'Umm Al Quwain': (25.5122, 55.5990),
    'Fujairah': (25.1288, 56.3265)
}

# Aggregate the sum of prices by district
district_price_sum = prop.groupby('District')['Price'].sum().reset_index()

# Get the top 10 and last 10 districts by sum of prices
top_10_districts = district_price_sum.nlargest(10, 'Price')
last_10_districts = district_price_sum.nsmallest(10, 'Price')

# Identify the highest and lowest districts
highest_district = top_10_districts.iloc[0]
lowest_district = last_10_districts.iloc[-1]

# Combine top 10 and last 10 districts
selected_districts = pd.concat([top_10_districts, last_10_districts])

# Filter the original dataframe for properties in the selected districts
filtered_prop = prop[prop['District'].isin(selected_districts['District'])]

# For each district, get the top 5 properties by price
top_properties_prop = filtered_prop.groupby('District').apply(lambda x: x.nlargest(5, 'Price')).reset_index(drop=True)

# Create a map centered around the average coordinates
center_lat = top_properties_prop['Latitude'].mean()
center_lon = top_properties_prop['Longitude'].mean()
map = folium.Map(location=[center_lat, center_lon], zoom_start=10)

# Add markers for the top properties in each district
for idx, row in top_properties_prop.iterrows():
    # Determine the icon color
    if row['District'] == highest_district['District']:
        icon = folium.Icon(color='green', icon='arrow-up')
    elif row['District'] == lowest_district['District']:
        icon = folium.Icon(color='red', icon='arrow-down')
    else:
        icon = folium.Icon(color='green' if row['District'] in top_10_districts['District'].values else 'red', icon='arrow-up' if row['District'] in top_10_districts['District'].values else 'arrow-down')

    # Add marker to the map
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=f"District: {row['District']}\nPrice: {row['Price']}\nArea: {row['Area']}",
        icon=icon
    ).add_to(map)

    # Calculate distance to the city center
    city_center_coords = city_centers[row['City']]
    distance_to_center = geodesic((row['Latitude'], row['Longitude']), city_center_coords).km

    # Add a polyline to mark the distance from the city center to the property
    folium.PolyLine(
        locations=[city_center_coords, (row['Latitude'], row['Longitude'])],
        color="blue",
        dash_array="5, 10"
    ).add_to(map)

    # Add distance label above the polyline with a blue background box
    mid_point = [(city_center_coords[0] + row['Latitude']) / 2, (city_center_coords[1] + row['Longitude']) / 2]
    folium.map.Marker(
        mid_point,
        icon=folium.DivIcon(
            html=f'''
                <div style="
                    background-color: #007bff;
                    color: white;
                    padding: 5px 47px;
                    border-radius: 10px;
                    border: 1px solid white;
                    font-size: 10pt;
                    font-weight: bold;
                    text-align: center;">
                    {distance_to_center:.2f} km
                </div>
            '''
        )
    ).add_to(map)

# Save the map to an HTML file
map.save('district_property_map_with_distances.html')

# To display in Jupyter notebook, use:
map

In [None]:
#@title **4.19 Missing Data**
prop.isnull().sum()

In [None]:
#@title **4.20 Dealing With Missing Data**

prop.drop(columns = ['Bulding_Descripton','View'], inplace=True)
prop.reset_index(drop=True, inplace=True)

# 3. Feature Engineering