## Shapefile Converter
A tool to convert Telkomsel's shapefile into GeoJSON format.    

**Writer** : ngakan.gandhi@dsanalytics.com  
**License** : MIT License.

In [1]:
# Import modules
import geopandas as gpd
import json 
import pandas as pd
import shapely
from pathlib import Path

In [2]:
# Set the path towards our data directory
data_dir = str(Path.cwd()) + '/PETA_V21'
out_dir = str(Path.cwd()) + '/results'

In [3]:
def load_shapefile_into_geodataframe(shp_in):
    """Load the .shp file and return a GeoDataFrame"""
    print("Reading shapefile input...")
    return gpd.read_file(shp_in)

In [4]:
def subset_geodataframe(geodataframe, list_of_cols_to_keep):
    """Only keep columns of a Geodataframe as we specify"""
    gdf = geodataframe[list_of_cols_to_keep]
    return gdf

In [5]:
def rename_geodataframe_columns(geodataframe, col_name_new):
    """Rename geodataframe column names as we specify.
    But keep 'geometry' as is."""
    col_name_orig = list(geodataframe.columns)
    col_name_dict = dict(zip(col_name_orig, col_name_new))
    gdf.rename(columns=col_name_dict, inplace=True)
    return gdf

In [6]:
def write_geodataframe_into_geojson(geodataframe, geojson_path):
    """Write the geodataframe file as a GeoJSON file."""
    print("Writing GeoDataFrame to GeoJSON...")
    # unset CRS
    gdf.crs = None
    geodataframe.to_file(geojson_path, driver='GeoJSON')
    print("Done writing shapefile input to GeoJSON!")

In [7]:
def json_neater(raw_geojson_in, neat_geojson_out):
    """Indentise the GeoJSON converted by load_shapefile_into_geojson()"""
    print("Loading raw GeoJSON file...")
    parsed = json.loads(raw_geojson_in)
    with open(neat_geojson_out, 'w') as outfile:
        json.dump(parsed, outfile, indent=4)
    print("Done prettify GeoJSON!")

In [8]:
# %%timeit -n 1 -r 1
# # Set shp_in and geojson_out
# shp_in = data_dir + '/BTS_DESA_NASIONAL_V21_0.shp'
geojson_path = out_dir + '/BTS_DESA_NASIONAL_V21_0.json'

# load_shapefile_into_geojson(shp_in, geojson_out)

In [9]:
# Read the GeoDataFrame
shp_in = data_dir + '/BTS_DESA_NASIONAL_V21_0.shp'
gdf = load_shapefile_into_geodataframe(shp_in)

gdf.head(2)

Reading shapefile input...


Unnamed: 0,ID_DESA,DESA,ID_KEC,KECAMATAN,ID_KAB,KABUPATEN,ID_PROV,PROVINSI,ID_REG,REGION,...,ID_BRANCH,BRANCH,ID_SUBBRAN,SUBBRANCH,ID_CLUSTER,CLUSTER,MITRA_AD,LONGITUDE,LATITUDE,geometry
0,1101010001,LATIUNG,1101010,TEUPAH SELATAN,1101,SIMEULUE,11,NANGGROE ACEH DARUSSALAM,1,SUMBAGUT,...,50,BANDA ACEH,105,MEULABOH,113,MEULABOH,,96.4556,2.37651,"POLYGON ((96.48015 2.34382, 96.47920 2.34325, ..."
1,1101010002,LABUHAN BAJAU,1101010,TEUPAH SELATAN,1101,SIMEULUE,11,NANGGROE ACEH DARUSSALAM,1,SUMBAGUT,...,50,BANDA ACEH,105,MEULABOH,113,MEULABOH,,96.4729,2.39231,"POLYGON ((96.49244 2.38675, 96.49196 2.38556, ..."


In [10]:
# Filter columns
cols_to_keep = ['ID_DESA', 'DESA', 'ID_KEC',
                'KECAMATAN', 'ID_KAB', 'KABUPATEN',
                'ID_PROV', 'PROVINSI', 'geometry']
gdf = subset_geodataframe(gdf, cols_to_keep)

gdf.head(2)

Unnamed: 0,ID_DESA,DESA,ID_KEC,KECAMATAN,ID_KAB,KABUPATEN,ID_PROV,PROVINSI,geometry
0,1101010001,LATIUNG,1101010,TEUPAH SELATAN,1101,SIMEULUE,11,NANGGROE ACEH DARUSSALAM,"POLYGON ((96.48015 2.34382, 96.47920 2.34325, ..."
1,1101010002,LABUHAN BAJAU,1101010,TEUPAH SELATAN,1101,SIMEULUE,11,NANGGROE ACEH DARUSSALAM,"POLYGON ((96.49244 2.38675, 96.49196 2.38556, ..."


In [14]:
# Rename columns
new_cols_names = ['id_kelurahan', 'kelurahan',
                  'id_kecamatan', 'kecamatan',
                  'id_kabupaten', 'kabupaten',
                  'id_province', 'province', 'geometry']

gdf = rename_geodataframe_columns(gdf, new_cols_names)

In [15]:
# Write GeoDataFrame into a GeoJSON
write_geodataframe_into_geojson(gdf, geojson_path)

Writing GeoDataFrame to GeoJSON...
Done writing shapefile input to GeoJSON!
