## Implementasi pembacaan data dengan Python

In [3]:
import pandas as pd
import geopandas as gpd

file_geojson = "/content/drive/MyDrive/Colab Notebooks/TeamProject2/data_geojson.geojson"
file_emisi = "/content/drive/MyDrive/Colab Notebooks/TeamProject2/emisi_karbon_web.csv"
file_luas = "/content/drive/MyDrive/Colab Notebooks/TeamProject2/luas_kebakaran_web (1).csv"

def read_data():
    df_geo = gpd.read_file(file_geojson)
    df_emisi = pd.read_csv(file_emisi)
    df_luas = pd.read_csv(file_luas)

    return df_geo, df_emisi, df_luas

df_geo, df_emisi, df_luas = read_data()

print("\n Data GeoJSON:")
print(df_geo.head())
print("\n Data Emisi Karbon:")
print(df_emisi.head())
print("\n Data Luas Kebakaran Hutan:")
print(df_luas.head())



 Data GeoJSON:
   ADM0_CODE  ADM0_NAME  ADM1_CODE        ADM1_NAME DISP_AREA  EXP1_YEAR  \
0        116  Indonesia      73615           Maluku        NO       3000   
1        116  Indonesia      73616     Maluku Utara        NO       3000   
2        116  Indonesia      73609  Bangka Belitung        NO       3000   
3        116  Indonesia      73610           Banten        NO       3000   
4        116  Indonesia      73611        Gorontalo        NO       3000   

   Luas_Hutan_ha  Luas_Lahan Basah_ha  Luas_Lahan Kritis_ha  \
0   3.228349e+06         21802.989529         172360.739294   
1   3.654637e+06         21303.918667         170691.106706   
2   1.281995e+06        114593.018863          45021.504000   
3   6.521884e+05         17209.494078          26054.859098   
4   1.064263e+06          6681.906667          27370.067255   

   Luas_Padang Rumput_ha  Luas_Perairan_ha  Luas_Perkotaan_ha  \
0          149114.669098               0.0        6385.349686   
1          157336.

## Analisis struktur data menggunakan pandas

In [4]:
def struktur_data(df, name):
    print(f"\n--- Struktur Data: {name} ---")
    print(df.info())

struktur_data(df_geo, "GeoJSON")
struktur_data(df_emisi, "Emisi Karbon")
struktur_data(df_luas, "Luas Kebakaran Hutan")



--- Struktur Data: GeoJSON ---
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 33 entries, 0 to 32
Data columns (total 20 columns):
 #   Column                 Non-Null Count  Dtype   
---  ------                 --------------  -----   
 0   ADM0_CODE              33 non-null     int32   
 1   ADM0_NAME              33 non-null     object  
 2   ADM1_CODE              33 non-null     int32   
 3   ADM1_NAME              33 non-null     object  
 4   DISP_AREA              33 non-null     object  
 5   EXP1_YEAR              33 non-null     int32   
 6   Luas_Hutan_ha          33 non-null     float64 
 7   Luas_Lahan Basah_ha    33 non-null     float64 
 8   Luas_Lahan Kritis_ha   33 non-null     float64 
 9   Luas_Padang Rumput_ha  33 non-null     float64 
 10  Luas_Perairan_ha       33 non-null     float64 
 11  Luas_Perkotaan_ha      33 non-null     float64 
 12  Luas_Pertanian_ha      33 non-null     float64 
 13  Luas_Semak Belukar_ha  33 non-null     float64 
 14  Prov

## Membuat data profiling sederhana

In [6]:
!pip install ydata-profiling

Collecting ydata-profiling
  Downloading ydata_profiling-4.16.1-py2.py3-none-any.whl.metadata (22 kB)
Collecting visions<0.8.2,>=0.7.5 (from visions[type_image_path]<0.8.2,>=0.7.5->ydata-profiling)
  Downloading visions-0.8.1-py3-none-any.whl.metadata (11 kB)
Collecting htmlmin==0.1.12 (from ydata-profiling)
  Downloading htmlmin-0.1.12.tar.gz (19 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting phik<0.13,>=0.11.1 (from ydata-profiling)
  Downloading phik-0.12.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)
Collecting multimethod<2,>=1.4 (from ydata-profiling)
  Downloading multimethod-1.12-py3-none-any.whl.metadata (9.6 kB)
Collecting imagehash==4.3.1 (from ydata-profiling)
  Downloading ImageHash-4.3.1-py2.py3-none-any.whl.metadata (8.0 kB)
Collecting dacite>=1.8 (from ydata-profiling)
  Downloading dacite-1.9.2-py3-none-any.whl.metadata (17 kB)
Collecting puremagic (from visions<0.8.2,>=0.7.5->visions[type_image_path]<0.8.2,>=0.7.5->

In [8]:
import pandas as pd
import geopandas as gpd
from ydata_profiling import ProfileReport
import os

df_geo_attr = df_geo.drop(columns="geometry")

profile_emisi = ProfileReport(df_emisi, explorative=True)
profile_luas = ProfileReport(df_luas, explorative=True)
profile_geo = ProfileReport(df_geo_attr, explorative=True)

profile_emisi.to_file(os.path.join(SAVE_PATH, "profiling_emisi_karbon.html"))
profile_luas.to_file(os.path.join(SAVE_PATH, "profiling_luas_kebakaran.html"))
profile_geo.to_file(os.path.join(SAVE_PATH, "profiling_geojson.html"))

print(f"Data profiling selesai. Laporan tersimpan di: {SAVE_PATH}")


Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]


100%|██████████| 7/7 [00:00<00:00, 145.20it/s]


Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]


100%|██████████| 7/7 [00:00<00:00, 76.42it/s]


Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]


  0%|          | 0/19 [00:00<?, ?it/s][A
 26%|██▋       | 5/19 [00:00<00:00, 47.03it/s][A
100%|██████████| 19/19 [00:00<00:00, 68.21it/s]
  discretized_df.loc[:, column] = self._discretize_column(
  discretized_df.loc[:, column] = self._discretize_column(
  discretized_df.loc[:, column] = self._discretize_column(
  discretized_df.loc[:, column] = self._discretize_column(


Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

Data profiling selesai. Laporan tersimpan di: /content/drive/MyDrive/Colab Notebooks/TeamProject2


## Implementasi struktur data dengan Python

In [9]:
import pandas as pd
import geopandas as gpd

print("Struktur Data GeoJSON (GeoDataFrame):")
print(df_geo.info(), "\n")

print("Struktur Data Emisi Karbon:")
print(df_emisi.info(), "\n")

print("Struktur Data Luas Kebakaran:")
print(df_luas.info(), "\n")


Struktur Data GeoJSON (GeoDataFrame):
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 33 entries, 0 to 32
Data columns (total 20 columns):
 #   Column                 Non-Null Count  Dtype   
---  ------                 --------------  -----   
 0   ADM0_CODE              33 non-null     int32   
 1   ADM0_NAME              33 non-null     object  
 2   ADM1_CODE              33 non-null     int32   
 3   ADM1_NAME              33 non-null     object  
 4   DISP_AREA              33 non-null     object  
 5   EXP1_YEAR              33 non-null     int32   
 6   Luas_Hutan_ha          33 non-null     float64 
 7   Luas_Lahan Basah_ha    33 non-null     float64 
 8   Luas_Lahan Kritis_ha   33 non-null     float64 
 9   Luas_Padang Rumput_ha  33 non-null     float64 
 10  Luas_Perairan_ha       33 non-null     float64 
 11  Luas_Perkotaan_ha      33 non-null     float64 
 12  Luas_Pertanian_ha      33 non-null     float64 
 13  Luas_Semak Belukar_ha  33 non-null     float64 
 14

## Membuat fungsi-fungsi untuk transformasi data

In [10]:
df_luas.columns = df_luas.columns.str.lower().str.replace(" ", "_")
df_emisi.columns = df_emisi.columns.str.lower().str.replace(" ", "_")
df_geo.columns = df_geo.columns.str.lower().str.replace(" ", "_")

if 'luas_kebakaran' in df_luas.columns:
    df_luas['luas_kebakaran_km2'] = df_luas['luas_kebakaran']

if 'emisi_karbon' in df_emisi.columns:
    df_emisi['emisi_ton'] = df_emisi['emisi_karbon'] * 1000

if 'provinsi' in df_geo.columns:
    df_geo['provinsi'] = df_geo['provinsi'].str.title()

df_luas.to_csv("/content/drive/MyDrive/Colab Notebooks/TeamProject2/luas_kebakaran_transformed.csv", index=False)
df_emisi.to_csv("/content/drive/MyDrive/Colab Notebooks/TeamProject2/emisi_karbon_transformed.csv", index=False)
df_geo.to_file("/content/drive/MyDrive/Colab Notebooks/TeamProject2/data_geojson_transformed.geojson", driver="GeoJSON")

print("Transformasi selesai! Data tersimpan di Google Drive.")


Transformasi selesai! Data tersimpan di Google Drive.


## Validasi hasil transformasi

In [None]:
import pandas as pd
import geopandas as gpd

file_luas_transformed = "/content/drive/MyDrive/Colab Notebooks/TeamProject2/luas_kebakaran_transformed.csv"
file_emisi_transformed = "/content/drive/MyDrive/Colab Notebooks/TeamProject2/emisi_karbon_transformed.csv"
file_geo_transformed = "/content/drive/MyDrive/Colab Notebooks/TeamProject2/data_geojson_transformed.geojson"

df_luas = pd.read_csv(file_luas_transformed)
df_emisi = pd.read_csv(file_emisi_transformed)
df_geo = gpd.read_file(file_geo_transformed)

print("Cek Missing Values:")
print("Luas Kebakaran:\n", df_luas.isnull().sum())
print("Emisi Karbon:\n", df_emisi.isnull().sum())
print("GeoJSON:\n", df_geo.isnull().sum())

print("\n Statistik Data Luas Kebakaran:")
print(df_luas.describe())
print("\n Statistik Data Emisi Karbon:")
print(df_emisi.describe())

print("\nCek Tipe Data Setelah Transformasi:")
print("Luas Kebakaran:", df_luas.dtypes)
print("Emisi Karbon:", df_emisi.dtypes)
print("GeoJSON:", df_geo.dtypes)

if 'luas_kebakaran_km2' in df_luas.columns:
    print("\n Luas Kebakaran berhasil dikonversi ke km².")
if 'emisi_ton' in df_emisi.columns:
    print("Emisi Karbon berhasil dikonversi ke ton.")
if 'provinsi' in df_geo.columns and df_geo['provinsi'].str.istitle().all():
    print("Nama provinsi sudah terformat dengan baik.")

print("\n Validasi Selesai! Pastikan tidak ada anomali pada data.")


Cek Missing Values:
Luas Kebakaran:
 no                           0
provinsi                     0
luas_area_terbakar_(ha)      0
luas_area_terbakar_(ha).1    0
luas_area_terbakar_(ha).2    0
luas_area_terbakar_(ha).3    0
luas_area_terbakar_(ha).4    0
dtype: int64
Emisi Karbon:
 no          0
provinsi    0
2018        0
2019        0
2020        0
2021        0
2022        0
dtype: int64
GeoJSON:
 adm0_code                0
adm0_name                0
adm1_code                0
adm1_name                0
disp_area                0
exp1_year                0
luas_hutan_ha            0
luas_lahan_basah_ha      0
luas_lahan_kritis_ha     0
luas_padang_rumput_ha    0
luas_perairan_ha         0
luas_perkotaan_ha        0
luas_pertanian_ha        0
luas_semak_belukar_ha    0
provinsi                 0
status                   0
str1_year                0
shape_area               0
shape_leng               0
geometry                 0
dtype: int64

 Statistik Data Luas Kebakaran:
       luas