## Shapefile Converter
A tool to convert Telkomsel's shapefile into GeoJSON format.    

**Writer** : ngakan.gandhi@dsanalytics.com  
**License** : MIT License.

In [1]:
# Import modules
import geopandas as gpd
import json 
import pandas as pd
import shapely
from pathlib import Path

In [2]:
# Set the path towards our data directory
data_dir = str(Path.cwd()) + '/PETA_V21'
out_dir = str(Path.cwd()) + '/results'

In [3]:
def load_shapefile_into_geodataframe(shp_in):
    """Load the .shp file and return a GeoDataFrame"""
    print("Reading shapefile input...")
    return gpd.read_file(shp_in)

In [4]:
def subset_geodataframe(geodataframe, list_of_cols_to_keep):
    """Only keep columns of a Geodataframe as we specify"""
    gdf = geodataframe[list_of_cols_to_keep]
    return gdf

In [5]:
def rename_geodataframe_columns(geodataframe, col_name_new):
    """Rename geodataframe column names as we specify.
    But keep 'geometry' as is."""
    col_name_orig = list(geodataframe.columns)
    col_name_dict = dict(zip(col_name_orig, col_name_new))
    gdf.rename(columns=col_name_dict, inplace=True)
    return gdf

In [6]:
def create_gdf_hierarchy(geodataframe):
    """Construct a new column 'hierarchy' representing
    the structure used in Dataspark's GeoJSON""" 
    def create_hierarchy(row):
        dict_container = [{'id': row.ID_DESA, 'level': 'kelurahan', 'name': row.DESA},
                          {'id': row.ID_KEC, 'level': 'kecamatan', 'name': row.KECAMATAN},
                          {'id': row.ID_KAB, 'level': 'kabupaten', 'name': row.KABUPATEN},
                          {'id': row.ID_PROV, 'level': 'province', 'name': row.PROVINSI}]
        return pd.Series(str(dict_container))
    geodataframe['hierarchy'] = geodataframe.apply(lambda row: create_hierarchy(row), axis=1)
    return geodataframe

In [7]:
def slice_geodataframe(geodataframe, start_idx, end_idx):
    """Slice a geodataframe to return a smaller set of it"""
    print("Sliced GeoDataFrame has length of : {}".format(len(geodataframe[start_idx:end_idx])))
    gdf = geodataframe[start_idx:end_idx]
    gdf.crs = None
    return gdf

In [8]:
def write_geodataframe_into_geojson(geodataframe, geojson_path):
    """Write the geodataframe file as a GeoJSON file."""
    print("Writing GeoDataFrame to GeoJSON...")
    # unset CRS
    geodataframe.crs = None
    geodataframe = geodataframe[['geometry','hierarchy']]
    geodataframe.to_file(geojson_path, driver='GeoJSON')
    print("Done writing shapefile input to GeoJSON!")

In [9]:
def get_geodataframe_row_from_id_desa(geodataframe, id_desa):
    """Return the index of the row containing specified ID_DESA"""
    row_idx = geodataframe.index[geodataframe['ID_DESA'] == str(id_desa)].tolist()
    return row_idx

In [10]:
def json_neater(raw_geojson_in, neat_geojson_out):
    """Indentise the GeoJSON converted by load_shapefile_into_geojson()"""
    print("Loading raw GeoJSON file...")
    with open(raw_geojson_in, encoding='utf-8', errors='ignore') as json_data:
        data = json.load(json_data, strict=False)
        with open(neat_geojson_out, 'w') as outfile:
            json.dump(data, outfile, indent=4)
    print("Done prettify GeoJSON!")

In [11]:
# Set shp_in, geojson, final json path
shp_in = data_dir + '/BTS_DESA_NASIONAL_V21_0.shp'
geojson_path = out_dir + '/BTS_DESA_NASIONAL_V21_0.json'
final_json = out_dir + '/BTS_DESA_NASIONAL_V21_0_PRETTY.json'

In [12]:
# Read the GeoDataFrame
gdf = load_shapefile_into_geodataframe(shp_in)

gdf.head(2)

Reading shapefile input...


Unnamed: 0,ID_DESA,DESA,ID_KEC,KECAMATAN,ID_KAB,KABUPATEN,ID_PROV,PROVINSI,ID_REG,REGION,...,ID_BRANCH,BRANCH,ID_SUBBRAN,SUBBRANCH,ID_CLUSTER,CLUSTER,MITRA_AD,LONGITUDE,LATITUDE,geometry
0,1101010001,LATIUNG,1101010,TEUPAH SELATAN,1101,SIMEULUE,11,NANGGROE ACEH DARUSSALAM,1,SUMBAGUT,...,50,BANDA ACEH,105,MEULABOH,113,MEULABOH,,96.4556,2.37651,"POLYGON ((96.48015 2.34382, 96.47920 2.34325, ..."
1,1101010002,LABUHAN BAJAU,1101010,TEUPAH SELATAN,1101,SIMEULUE,11,NANGGROE ACEH DARUSSALAM,1,SUMBAGUT,...,50,BANDA ACEH,105,MEULABOH,113,MEULABOH,,96.4729,2.39231,"POLYGON ((96.49244 2.38675, 96.49196 2.38556, ..."


In [13]:
# Create a 'hierarchy' column
gdf = create_gdf_hierarchy(gdf)

gdf.head(2)

Unnamed: 0,ID_DESA,DESA,ID_KEC,KECAMATAN,ID_KAB,KABUPATEN,ID_PROV,PROVINSI,ID_REG,REGION,...,BRANCH,ID_SUBBRAN,SUBBRANCH,ID_CLUSTER,CLUSTER,MITRA_AD,LONGITUDE,LATITUDE,geometry,hierarchy
0,1101010001,LATIUNG,1101010,TEUPAH SELATAN,1101,SIMEULUE,11,NANGGROE ACEH DARUSSALAM,1,SUMBAGUT,...,BANDA ACEH,105,MEULABOH,113,MEULABOH,,96.4556,2.37651,"POLYGON ((96.48015 2.34382, 96.47920 2.34325, ...","[{'id': '1101010001', 'level': 'kelurahan', 'n..."
1,1101010002,LABUHAN BAJAU,1101010,TEUPAH SELATAN,1101,SIMEULUE,11,NANGGROE ACEH DARUSSALAM,1,SUMBAGUT,...,BANDA ACEH,105,MEULABOH,113,MEULABOH,,96.4729,2.39231,"POLYGON ((96.49244 2.38675, 96.49196 2.38556, ...","[{'id': '1101010002', 'level': 'kelurahan', 'n..."


In [14]:
# Filter columns
cols_to_keep = ['hierarchy', 'geometry', 'ID_DESA']
gdf = subset_geodataframe(gdf, cols_to_keep)

gdf.head(2)

Unnamed: 0,hierarchy,geometry,ID_DESA
0,"[{'id': '1101010001', 'level': 'kelurahan', 'n...","POLYGON ((96.48015 2.34382, 96.47920 2.34325, ...",1101010001
1,"[{'id': '1101010002', 'level': 'kelurahan', 'n...","POLYGON ((96.49244 2.38675, 96.49196 2.38556, ...",1101010002


In [15]:
# Slice 50 geodataframe
gdf_smaller = slice_geodataframe(gdf, 0, 50)

gdf_smaller.head()

Sliced GeoDataFrame has length of : 50


Unnamed: 0,hierarchy,geometry,ID_DESA
0,"[{'id': '1101010001', 'level': 'kelurahan', 'n...","POLYGON ((96.48015 2.34382, 96.47920 2.34325, ...",1101010001
1,"[{'id': '1101010002', 'level': 'kelurahan', 'n...","POLYGON ((96.49244 2.38675, 96.49196 2.38556, ...",1101010002
2,"[{'id': '1101010003', 'level': 'kelurahan', 'n...","POLYGON ((96.37330 2.34174, 96.37327 2.34161, ...",1101010003
3,"[{'id': '1101010004', 'level': 'kelurahan', 'n...","POLYGON ((96.47211 2.43549, 96.47207 2.43430, ...",1101010004
4,"[{'id': '1101010005', 'level': 'kelurahan', 'n...","POLYGON ((96.47117 2.44948, 96.46327 2.44910, ...",1101010005


In [16]:
# Re-set the testing path
geojson_path = out_dir + '/BTS_DESA_NASIONAL_V21_0_smaller.json'
final_json = out_dir + '/BTS_DESA_NASIONAL_V21_0_PRETTY_smaller.json'

In [17]:
# Write GeoDataFrame into a GeoJSON
write_geodataframe_into_geojson(gdf_smaller, geojson_path)

Writing GeoDataFrame to GeoJSON...
Done writing shapefile input to GeoJSON!


In [18]:
# Make the GeoJSON neater
json_neater(geojson_path, final_json)

Loading raw GeoJSON file...
Done prettify GeoJSON!


In [19]:
gdf_smaller.head()

Unnamed: 0,hierarchy,geometry,ID_DESA
0,"[{'id': '1101010001', 'level': 'kelurahan', 'n...","POLYGON ((96.48015 2.34382, 96.47920 2.34325, ...",1101010001
1,"[{'id': '1101010002', 'level': 'kelurahan', 'n...","POLYGON ((96.49244 2.38675, 96.49196 2.38556, ...",1101010002
2,"[{'id': '1101010003', 'level': 'kelurahan', 'n...","POLYGON ((96.37330 2.34174, 96.37327 2.34161, ...",1101010003
3,"[{'id': '1101010004', 'level': 'kelurahan', 'n...","POLYGON ((96.47211 2.43549, 96.47207 2.43430, ...",1101010004
4,"[{'id': '1101010005', 'level': 'kelurahan', 'n...","POLYGON ((96.47117 2.44948, 96.46327 2.44910, ...",1101010005


In [20]:
# Test the get_geodataframe_row_from_id_desa
row_idx = get_geodataframe_row_from_id_desa(gdf_smaller, 1101010005)
print(row_idx)

[4]
