# Database of Aurangabad Neighbourhoods
### List of sources:

+ Neighbourhood Data:
    - [Aurangabad Municipal Corporation (AMC)][1]
    - [Government of India, Census][2] 

+ Latitude and Longitude Data: 
    - [Batch Geocoding][3]

+ Pricing data (Property rates) of Aurangabad Neighbourhoods: 
    - [Makan][4] 
    - [Housing][5]
[1]: http://rts.aurangabadmahapalika.org/RtsPortal/CitizenHome.html
[2]: https://censusindia.gov.in
[3]: https://www.geocod.io
[4]: https://www.makaan.com
[5]: https://housing.com/in/buy/aurangabad_maharashtra

### First import required libraries

In [1]:
import pandas as pd
import numpy as np

print('Libraries Imported..!!')

Libraries Imported..!!


### Let's load each `.csv` file to dataframe and see its dimensions

In [2]:
census = pd.read_csv('Aurangabad Census.csv')
print(census.shape)
census.head()

(144, 9)


Unnamed: 0,Neighborhood,Ward,Total Population,Population Male,Population Female,Sex Ratio,No.of Houses,Area Hectare,Population Density
0,Harsul,1,11304,6548,4756,726,1257,2894.8,3
1,Bhagatsingh Nagar,2,16339,8015,8324,1039,2786,232.38,70
2,Mhasoba Nagar,2,15245,8658,6587,761,4275,365.5,41
3,Radhaswami Colony,3,10226,5374,4852,903,2238,295.12,34
4,Ambar Hill,4,21406,11586,9820,848,4002,458.7,46


In [3]:
lat_lon = pd.read_csv('Aurangabad Neighbourhoods Geocodes.csv')
print(lat_lon.shape)
lat_lon.head()

(144, 4)


Unnamed: 0,Neighborhood,Latitude,Longitude,Post code
0,Aarati Nagar,19.904502,75.366124,
1,Ajabnagar,19.875788,75.3289894,
2,Altamash Colony,19.885573,75.350363,431003.0
3,Ambar Hill,19.930736,75.3241288,
4,Ambedkarnagar,19.896226,75.36477,


In [4]:
property_rates = pd.read_csv('Aurangabad Real estate property rates.csv')
print(property_rates.shape)
property_rates.head()

(148, 5)


Unnamed: 0,Neighborhood,Avg. Price,SqftPrice Min,SqftPrice Max,Properties
0,Cidco N-1,27455,22410,32500,37
1,MIDC Chikalthana,32500,32500,32500,13
2,Ambedkarnagar,1255,721,1789,1
3,Pawan Nagar,3426,2871,3980,2
4,Mayur Nagar,2258,1258,3258,2


In [5]:
ward_borough = pd.read_csv('Aurangabad Neighbourhoods Ward.csv')
print(ward_borough.shape)
ward_borough.head()

(119, 2)


Unnamed: 0,Ward,Borough
0,1,N-A
1,2,N-A
2,3,N-A
3,4,N-A
4,5,NW-A


### Let's join these dataframes to create a single database

In [6]:
df1 = pd.merge(census, ward_borough,  how='inner', on='Ward')
print(df1.shape)
df1.head()

(144, 10)


Unnamed: 0,Neighborhood,Ward,Total Population,Population Male,Population Female,Sex Ratio,No.of Houses,Area Hectare,Population Density,Borough
0,Harsul,1,11304,6548,4756,726,1257,2894.8,3,N-A
1,Bhagatsingh Nagar,2,16339,8015,8324,1039,2786,232.38,70,N-A
2,Mhasoba Nagar,2,15245,8658,6587,761,4275,365.5,41,N-A
3,Radhaswami Colony,3,10226,5374,4852,903,2238,295.12,34,N-A
4,Ambar Hill,4,21406,11586,9820,848,4002,458.7,46,N-A


In [7]:
df2 = pd.merge(df1, lat_lon,  how='inner', on='Neighborhood')
print(df2.shape)
df2.head()

(141, 13)


Unnamed: 0,Neighborhood,Ward,Total Population,Population Male,Population Female,Sex Ratio,No.of Houses,Area Hectare,Population Density,Borough,Latitude,Longitude,Post code
0,Harsul,1,11304,6548,4756,726,1257,2894.8,3,N-A,19.917838,75.3405862,
1,Bhagatsingh Nagar,2,16339,8015,8324,1039,2786,232.38,70,N-A,19.919366,75.3558207,
2,Mhasoba Nagar,2,15245,8658,6587,761,4275,365.5,41,N-A,19.913759,75.3508881,
3,Radhaswami Colony,3,10226,5374,4852,903,2238,295.12,34,N-A,19.918989,75.3406173,
4,Ambar Hill,4,21406,11586,9820,848,4002,458.7,46,N-A,19.930736,75.3241288,


In [8]:
df3 = pd.merge(df2, property_rates,  how='inner', on='Neighborhood')
print(df3.shape)
df3.head()

(141, 17)


Unnamed: 0,Neighborhood,Ward,Total Population,Population Male,Population Female,Sex Ratio,No.of Houses,Area Hectare,Population Density,Borough,Latitude,Longitude,Post code,Avg. Price,SqftPrice Min,SqftPrice Max,Properties
0,Harsul,1,11304,6548,4756,726,1257,2894.8,3,N-A,19.917838,75.3405862,,4455,3076,5833,3
1,Bhagatsingh Nagar,2,16339,8015,8324,1039,2786,232.38,70,N-A,19.919366,75.3558207,,4854,3120,6587,5
2,Mhasoba Nagar,2,15245,8658,6587,761,4275,365.5,41,N-A,19.913759,75.3508881,,2932,2351,3512,3
3,Radhaswami Colony,3,10226,5374,4852,903,2238,295.12,34,N-A,19.918989,75.3406173,,2250,1258,3241,2
4,Ambar Hill,4,21406,11586,9820,848,4002,458.7,46,N-A,19.930736,75.3241288,,1299,854,1744,1


### Re-arrange the columns 

In [9]:
df = df3[['Neighborhood','Ward','Latitude','Longitude','Borough','Post code','Total Population','Population Male','Population Female','Sex Ratio','No.of Houses','Area Hectare','Population Density','Avg. Price','SqftPrice Min','SqftPrice Max','Properties']].copy()
df

Unnamed: 0,Neighborhood,Ward,Latitude,Longitude,Borough,Post code,Total Population,Population Male,Population Female,Sex Ratio,No.of Houses,Area Hectare,Population Density,Avg. Price,SqftPrice Min,SqftPrice Max,Properties
0,Harsul,1,19.917838,75.3405862,N-A,,11304,6548,4756,726,1257,2894.80,3,4455,3076,5833,3
1,Bhagatsingh Nagar,2,19.919366,75.3558207,N-A,,16339,8015,8324,1039,2786,232.38,70,4854,3120,6587,5
2,Mhasoba Nagar,2,19.913759,75.3508881,N-A,,15245,8658,6587,761,4275,365.50,41,2932,2351,3512,3
3,Radhaswami Colony,3,19.918989,75.3406173,N-A,,10226,5374,4852,903,2238,295.12,34,2250,1258,3241,2
4,Ambar Hill,4,19.930736,75.3241288,N-A,,21406,11586,9820,848,4002,458.70,46,1299,854,1744,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136,Satara Parisar,115,19.848305,75.2978043,S-A,,35995,18015,17980,998,5402,61.02,589,3638,1562,5714,23
137,Waluj,116,19.832670,75.19974,WAL,,31000,15550,15450,994,2340,32.00,969,5147,2462,7831,21
138,Aurangabad Cantonment,117,19.879064,75.2881003,CAN,,22000,11400,10600,930,120,38.20,576,4067,2345,5789,8
139,Dharmapur,118,19.855670,75.25695,WAL,,4657,2347,2310,984,1012,12.30,379,2981,2413,490,2


#### `Post code` data is not available for each neighbourhood, let's drop `Post Code` column

In [10]:
df.drop('Post code',axis=1,inplace=True)

In [11]:
df.shape

(141, 16)

In [12]:
df.head()

Unnamed: 0,Neighborhood,Ward,Latitude,Longitude,Borough,Total Population,Population Male,Population Female,Sex Ratio,No.of Houses,Area Hectare,Population Density,Avg. Price,SqftPrice Min,SqftPrice Max,Properties
0,Harsul,1,19.917838,75.3405862,N-A,11304,6548,4756,726,1257,2894.8,3,4455,3076,5833,3
1,Bhagatsingh Nagar,2,19.919366,75.3558207,N-A,16339,8015,8324,1039,2786,232.38,70,4854,3120,6587,5
2,Mhasoba Nagar,2,19.913759,75.3508881,N-A,15245,8658,6587,761,4275,365.5,41,2932,2351,3512,3
3,Radhaswami Colony,3,19.918989,75.3406173,N-A,10226,5374,4852,903,2238,295.12,34,2250,1258,3241,2
4,Ambar Hill,4,19.930736,75.3241288,N-A,21406,11586,9820,848,4002,458.7,46,1299,854,1744,1


### Create `.csv` file of the final dataframe

In [13]:
df.to_csv('Aurangabad Ward-Wise Data.csv',index=False)

In [14]:
print('Database Created.!')

Database Created.!
