# Spatial analysis of Mumbai 
## Importing th necessary libraries

In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib
import seaborn as sns
from shapely.geometry import Point
import plotly.express as px
import warnings

matplotlib.rcParams['figure.figsize']=(20,10)
warnings.filterwarnings('ignore')

## Mumbai Price and location Dataset

In [26]:
df=pd.read_csv('Location-Price.csv')
df.head()

Unnamed: 0,Id,Price,Area,Location,No. of Bedrooms,New/Resale,Gymnasium,Lift Available,Car Parking,Maintenance Staff,...,Children's Play Area,Clubhouse,Intercom,Landscaped Gardens,Indoor Games,Gas Connection,Jogging Track,Swimming Pool,Price per Area,Pincode
0,0,48.5,720,Kharghar,1,0,0,1,1,1,...,0,0,0,0,0,0,0,0,6736.111111,410210
1,1,45.0,600,Kharghar,1,0,1,1,1,1,...,0,1,0,0,0,0,1,1,7500.0,410210
2,2,67.0,650,Kharghar,1,0,1,1,1,1,...,1,1,1,0,0,0,1,1,10307.692308,410210
3,3,45.0,650,Kharghar,1,0,0,1,1,1,...,0,0,1,1,0,0,0,0,6923.076923,410210
4,4,50.0,665,Kharghar,1,0,0,1,1,1,...,0,0,1,1,0,0,0,0,7518.796992,410210


In [27]:
df=df[['Location','Price']]

## Grouping locations as per the median price of the area

In [28]:
df=df.groupby(['Location']).agg(['median'])

## Adding the latitude and longitude as a geojson file

In [29]:
import geopandas as gpd
geo_df = gpd.read_file('location.geojson')  

In [30]:
geo_df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 109 entries, 0 to 108
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   name      109 non-null    object  
 1   geometry  109 non-null    geometry
dtypes: geometry(1), object(1)
memory usage: 1.8+ KB


In [31]:
geo_df.head()

Unnamed: 0,name,geometry
0,Ambarnath,POINT (73.18220 19.18250)
1,Ambivali,POINT (73.17280 19.28460)
2,Andheri East,POINT (72.85680 19.11590)
3,Andheri West,POINT (72.83270 19.11720)
4,Asangaon,POINT (73.35410 19.41490)


In [32]:
geo_df.columns=['Location','Coordinates']

## Merging the geodata with the Price Location data

In [33]:
merged_df=geo_df.merge(df, left_on='Location', right_on='Location', how='left')

In [34]:
merged_df

Unnamed: 0,Location,Coordinates,"(Price, median)"
0,Ambarnath,POINT (73.18220 19.18250),28.180000
1,Ambivali,POINT (73.17280 19.28460),110.000000
2,Andheri East,POINT (72.85680 19.11590),170.000000
3,Andheri West,POINT (72.83270 19.11720),278.999995
4,Asangaon,POINT (73.35410 19.41490),20.175000
...,...,...,...
104,Virar West,POINT (72.84730 19.45570),37.570000
105,Wadala East,POINT (72.87000 19.02030),245.000000
106,Wadi Bunder,POINT (72.84410 18.96140),
107,Worli,POINT (72.81640 19.02290),735.000000


In [35]:
merged_df.columns=['Location','Coordinates','Price']

## File output in geojson format

In [36]:
geo_data = gpd.GeoDataFrame(merged_df, geometry='Coordinates')
output_geojson_file_path = 'loc.geojson'
geo_data.to_file(output_geojson_file_path, driver='GeoJSON')

In [37]:
loc_df = gpd.read_file('loc.geojson') 

In [38]:
loc_df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 109 entries, 0 to 108
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   Location  109 non-null    object  
 1   Price     90 non-null     float64 
 2   geometry  109 non-null    geometry
dtypes: float64(1), geometry(1), object(1)
memory usage: 2.7+ KB


In [39]:
loc_df

Unnamed: 0,Location,Price,geometry
0,Ambarnath,28.180000,POINT (73.18220 19.18250)
1,Ambivali,110.000000,POINT (73.17280 19.28460)
2,Andheri East,170.000000,POINT (72.85680 19.11590)
3,Andheri West,278.999995,POINT (72.83270 19.11720)
4,Asangaon,20.175000,POINT (73.35410 19.41490)
...,...,...,...
104,Virar West,37.570000,POINT (72.84730 19.45570)
105,Wadala East,245.000000,POINT (72.87000 19.02030)
106,Wadi Bunder,,POINT (72.84410 18.96140)
107,Worli,735.000000,POINT (72.81640 19.02290)


In [40]:
loc_df['geometry']

0      POINT (73.18220 19.18250)
1      POINT (73.17280 19.28460)
2      POINT (72.85680 19.11590)
3      POINT (72.83270 19.11720)
4      POINT (73.35410 19.41490)
                 ...            
104    POINT (72.84730 19.45570)
105    POINT (72.87000 19.02030)
106    POINT (72.84410 18.96140)
107    POINT (72.81640 19.02290)
108    POINT (72.85680 19.23550)
Name: geometry, Length: 109, dtype: geometry

## Adding latitude and longitude columns to the dataset

In [41]:
loc_df['Latitudes'] = [point.y for point in loc_df['geometry']]
loc_df['Longitudes'] = [point.x for point in loc_df['geometry']]

In [42]:
loc_df

Unnamed: 0,Location,Price,geometry,Latitudes,Longitudes
0,Ambarnath,28.180000,POINT (73.18220 19.18250),19.1825,73.1822
1,Ambivali,110.000000,POINT (73.17280 19.28460),19.2846,73.1728
2,Andheri East,170.000000,POINT (72.85680 19.11590),19.1159,72.8568
3,Andheri West,278.999995,POINT (72.83270 19.11720),19.1172,72.8327
4,Asangaon,20.175000,POINT (73.35410 19.41490),19.4149,73.3541
...,...,...,...,...,...
104,Virar West,37.570000,POINT (72.84730 19.45570),19.4557,72.8473
105,Wadala East,245.000000,POINT (72.87000 19.02030),19.0203,72.8700
106,Wadi Bunder,,POINT (72.84410 18.96140),18.9614,72.8441
107,Worli,735.000000,POINT (72.81640 19.02290),19.0229,72.8164


## Plotting the data on the map (colour coded by price)

In [43]:
df_plot_tmp = pd.read_csv('loc.csv')

In [44]:
fig = px.scatter_mapbox(df_plot_tmp, lat="Latitudes", lon="Longitudes", color="Price",hover_name='Location', zoom=9, mapbox_style='open-street-map')
fig.show()