In [2]:
import dash
import dash_bootstrap_components as dbc
from dash import dcc, html
import plotly.express as px
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error


In [3]:
from google.colab import files
uploaded = files.upload()

Saving dubai_properties.csv to dubai_properties.csv


In [4]:
df = pd.read_csv("dubai_properties.csv")

In [5]:
df.head()

Unnamed: 0,Address,Rent,Beds,Baths,Type,Area_in_sqft,Rent_per_sqft,Rent_category,Frequency,Furnishing,Purpose,Posted_date,Age_of_listing_in_days,Location,City,Latitude,Longitude
0,"The Gate Tower 2, The Gate Tower, Shams Gate D...",124000,3,4,Apartment,1785,69.467787,Medium,Yearly,Unfurnished,For Rent,2024-03-07,45,Al Reem Island,Abu Dhabi,24.493598,54.407841
1,"Water's Edge, Yas Island, Abu Dhabi",140000,3,4,Apartment,1422,98.452883,Medium,Yearly,Unfurnished,For Rent,2024-03-08,44,Yas Island,Abu Dhabi,24.494022,54.607372
2,"Al Raha Lofts, Al Raha Beach, Abu Dhabi",99000,2,3,Apartment,1314,75.342466,Medium,Yearly,Furnished,For Rent,2024-03-21,31,Al Raha Beach,Abu Dhabi,24.485931,54.600939
3,"Marina Heights, Marina Square, Al Reem Island,...",220000,3,4,Penthouse,3843,57.246942,High,Yearly,Unfurnished,For Rent,2024-02-24,57,Al Reem Island,Abu Dhabi,24.493598,54.407841
4,"West Yas, Yas Island, Abu Dhabi",350000,5,7,Villa,6860,51.020408,High,Yearly,Unfurnished,For Rent,2024-02-16,65,Yas Island,Abu Dhabi,24.494022,54.607372


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 73742 entries, 0 to 73741
Data columns (total 17 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Address                 73742 non-null  object 
 1   Rent                    73742 non-null  int64  
 2   Beds                    73742 non-null  int64  
 3   Baths                   73742 non-null  int64  
 4   Type                    73742 non-null  object 
 5   Area_in_sqft            73742 non-null  int64  
 6   Rent_per_sqft           73742 non-null  float64
 7   Rent_category           73742 non-null  object 
 8   Frequency               73742 non-null  object 
 9   Furnishing              73742 non-null  object 
 10  Purpose                 73742 non-null  object 
 11  Posted_date             73742 non-null  object 
 12  Age_of_listing_in_days  73742 non-null  int64  
 13  Location                73742 non-null  object 
 14  City                    73742 non-null

In [7]:
df.describe()

Unnamed: 0,Rent,Beds,Baths,Area_in_sqft,Rent_per_sqft,Age_of_listing_in_days,Latitude,Longitude
count,73742.0,73742.0,73742.0,73742.0,73742.0,73742.0,73023.0,73023.0
mean,147925.0,2.162811,2.650213,2054.053552,88.057754,74.261547,24.918929,55.053133
std,306965.8,1.578155,1.632997,3003.919252,66.5344,72.346767,0.569356,0.653722
min,0.0,0.0,1.0,74.0,0.0,11.0,15.175847,43.351928
25%,54999.0,1.0,2.0,850.0,39.977778,30.0,24.493598,54.607372
50%,98000.0,2.0,2.0,1334.0,71.428571,52.0,25.078641,55.238209
75%,170000.0,3.0,3.0,2130.0,118.483412,95.0,25.197978,55.367138
max,55000000.0,12.0,11.0,210254.0,2182.044888,2276.0,25.92031,56.361294


In [8]:
# Check for missing values
print(df.isnull().sum())

Address                     0
Rent                        0
Beds                        0
Baths                       0
Type                        0
Area_in_sqft                0
Rent_per_sqft               0
Rent_category               0
Frequency                   0
Furnishing                  0
Purpose                     0
Posted_date                 0
Age_of_listing_in_days      0
Location                    0
City                        0
Latitude                  719
Longitude                 719
dtype: int64


In [9]:
df.dropna(subset=['Latitude', 'Longitude'], inplace=True)

In [10]:
df.drop_duplicates(inplace=True)

In [11]:
le = LabelEncoder()

df['Type'] = le.fit_transform(df['Type'])
df['Rent_category'] = le.fit_transform(df['Rent_category'])
df['Frequency'] = le.fit_transform(df['Frequency'])
df['Furnishing'] = le.fit_transform(df['Furnishing'])
df['Purpose'] = le.fit_transform(df['Purpose'])
df['Location'] = le.fit_transform(df['Location'])
df['City'] = le.fit_transform(df['City'])

In [12]:
df.drop(['Address', 'Posted_date'], axis=1, inplace=True)


In [13]:
df.reset_index(drop=True, inplace=True)

In [14]:
fig = px.box(df, x='Type', y='Rent', color='Type',
             title='Rent Distribution by Property Type')
fig.show()


In [15]:
avg_rent_by_beds = df.groupby('Beds')['Rent'].mean().reset_index()

fig = px.bar(avg_rent_by_beds, x='Beds', y='Rent',
             title='Average Rent by Number of Beds'
             )
fig.show()


In [16]:
fig = px.scatter(df, x='Area_in_sqft', y='Rent', color='Type',
                 title='Rent vs Area (Colored by Type)',
                 size='Baths', hover_data=['Beds', 'Furnishing'])
fig.show()


In [17]:
top_locations = df['Location'].value_counts().head(20).index
df_top_locations = df[df['Location'].isin(top_locations)]

fig = px.box(df_top_locations, x='Location', y='Rent', color='Location',
             title='Rent Distribution by Top 20 Locations')
fig.show()


In [18]:
fig = px.box(df, x='Furnishing', y='Rent', color='Furnishing',
             title='Rent Distribution by Furnishing Status')
fig.show()


In [19]:
fig = px.scatter_mapbox(df, lat="Latitude", lon="Longitude", color="Rent",
                        size="Rent", hover_name="Type",
                        color_continuous_scale=px.colors.sequential.Plasma,
                        zoom=10, height=600)

fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(title="Map of Rents by Location")
fig.show()


In [24]:
df.to_csv("cleaned_dubai_rental_data.csv", index=False)