SET GEOJSON DKI JAKARTA

INSTALL LIBRARY

In [1]:
!pip install geopandas matplotlib folium scikit-learn openpyxl pandas

Collecting geopandas
  Downloading geopandas-1.1.1-py3-none-any.whl.metadata (2.3 kB)
Collecting matplotlib
  Downloading matplotlib-3.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting folium
  Downloading folium-0.20.0-py2.py3-none-any.whl.metadata (4.2 kB)
Collecting scikit-learn
  Downloading scikit_learn-1.7.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)
Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting pandas
  Downloading pandas-2.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (91 kB)
Collecting numpy>=1.24 (from geopandas)
  Downloading numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
Collecting pyogrio>=0.7.2 (from geopandas)
  Downloading pyogrio-0.11.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (5.3 kB)
Collecting pyproj>=3.5.0 (from geopandas)
  Downloading pyproj-3.7.1-cp310-cp310-manyli

IMPORT LIBRARY

In [2]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
from folium.plugins import MarkerCluster
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import cross_val_score

LOAD DATASET

In [9]:
df = pd.read_excel("/root/penelitian/dataset.xlsx")

Data Preprocessing
Checking for missing values

In [10]:
df.isnull().sum()

periode_data                                   0
wilayah                                        0
kecamatan                                      0
jenis_kelamin                                  0
jumlah_estimasi_penderita                      0
jumlah_yang_mendapatkan_pelayanan_kesehatan    0
persentase                                     0
dtype: int64

Data Cleaning: Remove rows with missing target values

In [11]:
df.dropna(subset=['persentase'], inplace=True)

Encoding categorical data (Wilayah, Kecamatan, Jenis Kelamin)

In [12]:
df_encoded = pd.get_dummies(df[['wilayah', 'kecamatan', 'jenis_kelamin']], drop_first=True)

Joining encoded data with numerical columns

In [13]:
df_final = pd.concat([df[['jumlah_estimasi_penderita', 'jumlah_yang_mendapatkan_pelayanan_kesehatan', 'persentase']], df_encoded], axis=1)

Feature selection

In [14]:
X = df_final.drop('persentase', axis=1)
y = df_final['persentase']

Split the data into train and test sets

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Training the RandomForestRegressor model

In [16]:
model = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42)
model.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'squared_error'
,max_depth,10
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


Predicting on the test set

In [17]:
y_pred = model.predict(X_test)

Evaluating the model

In [18]:
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error (MAE): {mae}")

Mean Absolute Error (MAE): 8.940944237531486


Cross-validation

In [19]:
cv_scores = cross_val_score(model, X, y, cv=5, scoring='neg_mean_absolute_error')
print(f"Cross-validation MAE scores: {cv_scores}")

Cross-validation MAE scores: [ -7.31002303 -12.32374799  -6.60079735  -7.08402065 -12.72071159]


Create a map centered around Jakarta

In [None]:
m = folium.Map(location=[-6.186486, 106.834091], zoom_start=12)

Adding a choropleth map for the 'persentase' column

In [23]:
folium.Choropleth(
    geo_data='penelitian.geojson',  # GeoJSON file for the districts
    name='choropleth',
    data=df,
    columns=['kecamatan', 'persentase'],
    key_on='feature.properties.NAME_3',  # Ensure this matches your GeoJSON properties
    fill_color='YlOrRd',
    legend_name='Persentase Pelayanan Hipertensi'
).add_to(m)

<folium.features.Choropleth at 0x7f2a4c834a30>

Adding marker clusters for each district with 'persentase'

In [24]:
marker_cluster = MarkerCluster().add_to(m)

Iterate over GeoJSON features to add markers at centroid of each district

In [28]:
import folium
from folium.plugins import MarkerCluster
import geopandas as gpd

# Load the GeoJSON file for Jakarta districts
geojson_path = 'penelitian.geojson'
geojson_data = gpd.read_file(geojson_path)

# Create a map centered around Jakarta
m = folium.Map(location=[-6.186486, 106.834091], zoom_start=12)

# Add the choropleth map for 'persentase' column using GeoJSON
folium.Choropleth(
    geo_data=geojson_data,  # The GeoJSON data from the file
    name='choropleth',
    data=df,  # Ensure this has 'kecamatan' and 'persentase' columns
    columns=['kecamatan', 'persentase'],
    key_on='feature.properties.NAME_3',  # Ensure this matches your GeoJSON property name
    fill_color='YlOrRd',
    legend_name='Persentase Pelayanan Hipertensi'
).add_to(m)

# Add marker clusters for each district with 'persentase'
marker_cluster = MarkerCluster().add_to(m)

# Iterate over GeoJSON features to add markers at centroid of each district
for _, row in geojson_data.iterrows():
    # Get the centroid of the district polygon
    centroid = row['geometry'].centroid
    lat, lon = centroid.y, centroid.x
    
    # Check if kecamatan exists in the dataset
    kecamatan_name = row['NAME_3']
    
    # Match kecamatan with the dataset
    matching_data = df[df['kecamatan'].str.strip() == kecamatan_name.strip()]
    
    if not matching_data.empty:
        persentase = matching_data['persentase'].values[0]
        folium.Marker(
            location=[lat, lon],
            popup=f"{kecamatan_name}: {persentase}%",
        ).add_to(marker_cluster)

# Save the map to an HTML file
html_output_path = 'peta_hipertensi_with_markers.html'
m.save(html_output_path)

# Output path for downloading
html_output_path

'peta_hipertensi_with_markers.html'

Save the map to an HTML file

In [29]:
html_output_path = 'peta_hipertensi_with_markers.html'
m.save(html_output_path)