In [8]:
import pandas as pd

# Load the datasets
kepadatan_data = pd.read_csv('./datasets/cleaned_data_Kepadatan_menurut_kecamatan.csv')
combined_data = pd.read_csv('./datasets/combined_complete_data_only.csv')

# Normalize kecamatan names to lowercase for case-insensitive matching
kepadatan_data['kecamatan_normalized'] = kepadatan_data['kecamatan'].str.lower().str.strip()
combined_data['kecamatan_normalized'] = combined_data['kecamatan'].str.lower().str.strip()

# Merge the datasets based on normalized kecamatan names
merged_data = pd.merge(
    combined_data, 
    kepadatan_data, 
    on='kecamatan_normalized', 
    how='left',
    suffixes=('', '_kepadatan')
)

# Drop the normalized column used for matching
merged_data = merged_data.drop('kecamatan_normalized', axis=1)

# Display info about the merged dataset
print(f"Combined data shape: {combined_data.shape}")
print(f"Kepadatan data shape: {kepadatan_data.shape}")
print(f"Merged data shape: {merged_data.shape}")
print(f"\nColumns in merged dataset: {list(merged_data.columns)}")

merged_data.to_csv('./datasets/kepadatan_data_combined_with_faspub.csv', index=False)

Combined data shape: (5439, 9)
Kepadatan data shape: (30, 5)
Merged data shape: (5439, 12)

Columns in merged dataset: ['nama', 'alamat', 'kecamatan', 'type', 'latitude', 'longitude', 'tahun', 'source_file', 'kecamatan_kepadatan', 'Jumlah Penduduk', 'Luas Wilayah (km²)', 'Kepadatan (jiwa/km²)']
