In [6]:
import os
import shutil
import ftplib
import datetime
import requests
import rasterio
import numpy as np
import xarray as xr
import pandas as pd
import geopandas as gpd

from rasterio.mask import mask
from shapely.geometry import Point
from rasterio.transform import from_origin
from netCDF4 import Dataset, num2date

In [7]:
path_nc_file = "../data/row"
path_nc_row = "../repository/pre-processing/row"
path_modified = "../repository/pre-processing/result-row"
path_pch_tabular = "../data/tabular/data_pch_balai.xlsx"
mask_pulau = "../data/geojson/pulau.geojson"

In [None]:
ftp_host = os.getenv("HOST")
ftp_user = os.getenv("USER")
ftp_password = os.getenv("PASSWORD")
cycle = "12"

def connect_ftp():
    ftp = ftplib.FTP(ftp_host)
    ftp.login(ftp_user, ftp_password)
    ftp.cwd("/")
    return ftp

def download_file_from_ftp(ftp, filename):
    try:
        file_list = ftp.nlst()
        if filename in file_list:
            local_file_path = os.path.join(path_nc_file, filename)
            if not os.path.exists(local_file_path):
                with open(local_file_path, "wb") as local_file:
                    ftp.retrbinary(f"RETR {filename}", local_file.write)
                print(f"Download successfully {filename}")
            else:
                print(f"File {filename} is available")
            return local_file_path
    except Exception:
        print("File is corrupted, and there is nothing that can be done.")
        return None

def download_latest_file_from_ftp(ftp):
    file_list = ftp.nlst()
    if file_list:
        latest_file = sorted(file_list)[-1]
        return download_file_from_ftp(ftp, latest_file)
    return None

today = datetime.date.today() - datetime.timedelta(days=1)
filename = f"ECMWF.0125.{today.strftime('%Y%m%d')}{cycle}00.PREC.nc"
print("Downloading:", filename)

ftp = connect_ftp()
if ftp:
    local_file_path = download_file_from_ftp(ftp, filename) or download_latest_file_from_ftp(ftp)
    ftp.quit()
else:
    print("Cannot connect to FTP server")

if local_file_path is None:
    print("File is currently unavailable for download.")
    exit()

In [None]:
# Baca data .nc
data = Dataset(local_file_path)

lat = data.variables['lat'][:]
lon = data.variables['lon'][:]
prec = data.variables['tp'][:,0,:,:]
time = data.variables['time'][:]

dates = num2date(time, data.variables['time'].units)
print((dates[7]+datetime.timedelta(hours=7)).strftime('%Y-%m-%d %H:%M:%S WIB'))
print((dates[15]+datetime.timedelta(hours=7)).strftime('%Y-%m-%d %H:%M:%S WIB'))

xrain = prec
print(np.array_equal(xrain,prec))

#change rainfall data
for time in range (len(dates)):
    for lati in range(len(lat)) :
        for loni in range (len(lon)) :
            if (time<=0) :
                if (xrain[time,lati,loni]<=0) :
                    xrain[time,lati,loni] == 0
            elif(time>0) :
                if (xrain[time,lati,loni]<0) :
                    xrain[time,lati,loni] = xrain[time-1,lati,loni]
                if (xrain[time,lati,loni]-xrain[time-1,lati,loni]<0) :
                    xrain[time,lati,loni] = xrain[time-1,lati,loni]

# initiate rain before changing rainfall accumulate to interval rainfall
hjn = np.empty((len(dates),len(lat),len(lon)))
# change rainfall accumulate to interval rainfall
hjn[0,:,:] = xrain[0,:,:]
for i in range (1,len(dates)) :
    hjn[i,:,:] = xrain[i,:,:]-xrain[i-1,:,:]

# reshape 3d to 4d so it can be sabed into netcdf permanent dimension
hjn2 = hjn.reshape(len(dates), 1, len(lat), len(lon))

# rewrite to netcdf
ds = xr.open_dataset(local_file_path)
ds['tp'].values = hjn2
ds = ds.assign_coords(time=("time",ds['time'].values + np.timedelta64(7,'h')))

# output rewrite to netcdf
output_rewrite = f"ECMWF_new.0125.{today.strftime('%Y%m%d')}{cycle}00.PREC.nc"
output_path = os.path.join(path_nc_row, output_rewrite)
ds.to_netcdf(output_path)
print (ds)

In [None]:
result_file_name = f"ECMWF_new_3d.0125.{today.strftime('%Y%m%d')}{cycle}00.PREC.nc"

# Gabungkan path dengan nama file
file_path = os.path.join(path_modified, result_file_name)

# Buat file NetCDF baru
f = Dataset(file_path, 'w', format='NETCDF4')
print(f"File {result_file_name} berhasil dibuat")
print (data.variables)

# define variables foe new netcdf4 file
rain = hjn[:48,:,:]
latitude = data.variables["lat"][:]
longitude = data.variables["lon"][:]
time_k = data.variables['time'][:48]

tempgrp = f.createGroup('Rain_data')

# Create dimension for netCDF4
f.createDimension('lon', len(longitude))
f.createDimension('lat', len(latitude))
f.createDimension('time', len(time_k))

# Create variables for netCDF4
lon = f.createVariable('lon', 'f4', 'lon')
lat = f.createVariable('lat', 'f4', 'lat')  
rain = f.createVariable('rain', 'f4', ('time', 'lat', 'lon'))
time = f.createVariable('time', 'i4', 'time')

# define variables to be saved into netcdf4 file
lon[:] = longitude[:] #The "[:]" at the end of the variable instance is necessary
lat[:] = latitude[:]
rain[:,:,:] = hjn[:48,:,:]
time[:] = time_k+7

print (dates[0].strftime('%Y-%m-%d ')+str(cycle)+":00:00")

#Add global attributes
f.description = "ECMWF from BMKG modified by Jhon doe"
f.history = "Created " + today.strftime("%d/%m/%y")

#Add local attributes to variable instances
lon.units = 'degree_east'
lat.units = 'degree_north'
time.units = 'hours since '+(dates[0]+datetime.timedelta(hours=7)).strftime('%Y-%m-%d ')+str(cycle)+":00:00"
rain.units = 'mm'

# close file
f.close()

In [None]:
data_path = f"../repository/pre-processing/result-row\ECMWF_new_3d.0125.{today.strftime('%Y%m%d')}{cycle}00.PREC.nc"

if (cycle == '00') :
    n = 4
else :
    n = 0

data = Dataset(data_path)
print(data.variables)

PROSES UNTUK WMS

In [None]:
#Check waktu data
time = data.variables['time'][:]
dates = num2date(time, data.variables['time'].units)
print((dates[10+n]).strftime('%Y-%m-%d %H:%M:%S WIB'))
print((dates[16+n]).strftime('%Y-%m-%d %H:%M:%S WIB'))

lon_wms = np.array(data.variables["lon"][:])
lat_wms = np.array(data.variables["lat"][:])
prec_wms = np.array(data.variables["rain"][:])

# Inisialisasi akumulasi, meshgrid, tanggal
xx, yy = np.meshgrid(lon, lat)
accumulation = 0
start_date = dates[11+n - 1].strftime('%m%d%Y')

input_hour_dir = f"../repository/post-processing/nc_to_tiff/pch_hour_{start_date}/"
input_day_dir = f"../repository/post-processing/nc_to_tiff/pch_day_{start_date}/"
output_masked_hour_dir = f"../repository/post-processing/wms/hour/pch_hour_{start_date}/"
output_masked_day_dir = f"../repository/post-processing/wms/day/pch_day_{start_date}/"

os.makedirs(input_hour_dir, exist_ok=True)
os.makedirs(input_day_dir, exist_ok=True)
os.makedirs(output_masked_hour_dir, exist_ok=True)
os.makedirs(output_masked_day_dir, exist_ok=True)

for k in range(11+n, 19+n):
    hour_data = prec[k, :, :]
    accumulation += hour_data

    start_hour = dates[k - 1].strftime('%H%M')

    resolution_lon = (lon_wms.max() - lon_wms.min()) / lon_wms.shape[0]
    resolution_lat = (lat_wms.max() - lat_wms.min()) / lat_wms.shape[0]
    transform = from_origin(lon_wms.min(), lat_wms.max(), resolution_lon, resolution_lat)

    tiff_filename_hour = os.path.join(input_hour_dir, f"pch_hour_{start_date}_{start_hour}.tif")

    with rasterio.open(
        tiff_filename_hour,
        'w',
        driver='GTiff',
        height=hour_data.shape[0],
        width=hour_data.shape[1],
        count=1,
        dtype=hour_data.dtype,
        crs=rasterio.crs.CRS.from_proj4("+proj=longlat +datum=WGS84 +no_defs"),
        transform=transform
    ) as dst:
        dst.write(hour_data, 1)

    print(f"Successfully convert 3H netCDF to tiff: {tiff_filename_hour}")

tiff_filename_day = os.path.join(input_day_dir, f"pch_day_{start_date}_2200.tif")

with rasterio.open(
    tiff_filename_day,
    'w',
    driver='GTiff',
    height=accumulation.shape[0],
    width=accumulation.shape[1],
    count=1,
    dtype=accumulation.dtype,
    crs=rasterio.crs.CRS.from_proj4("+proj=longlat +datum=WGS84 +no_defs"),
    transform=transform
) as dst:
    dst.write(accumulation, 1)

print(f"Successfully convert 1D netCDF to tiff: {tiff_filename_day}")

def masked_data(input_tiff, mask_file, output_masked_dir):
    gdf = gpd.read_file(mask_file)
    geometries = [geom for geom in gdf.geometry]

    with rasterio.open(input_tiff) as src:
        out_image, out_transform = mask(src, geometries, crop=True)
        out_meta = src.meta.copy()

    out_meta.update({
        "driver": "GTiff",
        "height": out_image.shape[1],
        "width": out_image.shape[2],
        "transform": out_transform
    })

    file_name = os.path.basename(input_tiff)
    output_masked_tiff = os.path.join(output_masked_dir, file_name)

    with rasterio.open(output_masked_tiff, "w", **out_meta) as dst:
        dst.write(out_image)

    print(f"Result masked: {output_masked_tiff}")

for file in os.listdir(input_hour_dir):
    if file.endswith(".tif"):
        input_tiff = os.path.join(input_hour_dir, file)
        masked_data(input_tiff, mask_pulau, output_masked_hour_dir)

for file in os.listdir(input_day_dir):
    if file.endswith(".tif"):
        input_tiff = os.path.join(input_day_dir, file)
        masked_data(input_tiff, mask_pulau, output_masked_day_dir)

def delete_nc_to_tif(folder_path_for_tif):
    try:
        if os.path.exists(folder_path_for_tif):
            shutil.rmtree(folder_path_for_tif)
            print(f"Successfully delete folder {folder_path_for_tif}")
        else:
            print(f"not found or previously deleted {folder_path_for_tif}")
    except Exception as e:
        print(f"Failed to delete folder {folder_path_for_tif}: {e}")

delete_nc_to_tif(input_hour_dir)
delete_nc_to_tif(input_day_dir)

PROSES UNTUK WFS

In [None]:
#Check waktu data
time = data.variables['time'][:]
dates = num2date(time, data.variables['time'].units)
print((dates[10+n]).strftime('%Y-%m-%d %H:%M:%S WIB'))
print((dates[16+n]).strftime('%Y-%m-%d %H:%M:%S WIB'))

lat_wfs = data.variables["lat"][:]
lon_wfs = data.variables["lon"][:]
prec_wfs = data.variables['rain'][:,:,:]

In [None]:
pd.set_option("display.max.columns",None)
grid = pd.read_excel(path_pch_tabular)
print (grid)

In [None]:
#Get Data index lon, lat, balai, ws, kota
grid_long = grid['idx_long'].to_numpy()
grid_lat = grid['idx_lat'].to_numpy()
longitude_r = grid['long_data']
latitude_r = grid['lat_data']
latitude_prod = grid['lat_prod']
longitude_prod = grid['long_prod']
pulau = grid['pulau']
balai = grid['balai']
kode_balai = grid['kode_balai']
ws = grid ['wilayah_sungai']
das = grid['das']
prov = grid["provinsi"]
kota = grid['kabkot']
wilayah = grid['wilayah']
latshape = grid_lat.shape[0]
latshape

In [None]:
# Forecasting 1 day ahead
for k in range (11+n,19+n):
    print((dates[k]).strftime("%Y%m%d%H"))
    idx_t=(dates[k]).strftime("%Y%m%d%H")
    if (k==19+n):
        globals()['hujanharian_'+(idx_t)] = prec_wfs[11+n,:,:]
    else:
        globals()['hujanharian_'+(idx_t)] = prec_wfs[11+n:k+1,:,:].sum(axis=0)

In [None]:
# Forecasting 2 days ahead
for k in range (11+8+n,19+8+n):
    print((dates[k]).strftime("%Y%m%d%H"))
    idx_t=(dates[k]).strftime("%Y%m%d%H")
    if (k==19+8+n):
        globals()['hujanharian_'+(idx_t)] = prec_wfs[11+8+n,:,:]
    else:
        globals()['hujanharian_'+(idx_t)] = prec_wfs[11+8+n:k+1,:,:].sum(axis=0)

In [None]:
kolom = ['long_prod', 'lat_prod', 'tanggal', 'longitude','latitude','pulau', 'kode_balai', 'balai','das','provinsi','kabkot','wilayah']
df_dasWaspada = pd.DataFrame(columns=kolom)
for tab in range (latshape) :
    gridlat = grid_lat[tab]
    gridlon = grid_long[tab]
    for k in range (11+n,27+n):
        #utk cek awal mulai waspada
        idx_t = (dates[k]).strftime("%Y%m%d%H")
        idx_h = (dates[k]).strftime("%H:00")
        hujan_cek = globals()['hujanharian_'+(idx_t)]
        
        #untuk tanggal status siaga banjir dan pengecekan status akhir siaga banjir di tiap grid
        i_idx = 11+n if k<19+n else 19+n
        tanggal = (dates[i_idx]).strftime("%d %B %Y")
        
        if (hujan_cek[gridlat,gridlon]>=0.5):
            df = pd.DataFrame([{'tanggal':tanggal, 'long_prod':longitude_prod[tab], 'lat_prod':latitude_prod[tab], 'longitude':longitude_r[tab],'latitude':latitude_r[tab], 'pulau':pulau[tab], 'kode_balai':kode_balai[tab], 'balai':balai[tab],\
                             'das':das[tab],'provinsi':prov[tab],'kabkot':kota[tab],'wilayah':wilayah[tab]\
                              ,'waktu_mulai':idx_h}])
            i_idx = (11+n) if k<(19+n) else (19+n)
            for i in range (i_idx,i_idx+8):
                idx_t = (dates[i]).strftime("%Y%m%d%H")
                idx_h = (dates[i]).strftime("%H:00")
                df['ch_'+idx_h] = globals()['hujanharian_'+(idx_t)][gridlat,gridlon]
            
            kelas=globals()['hujanharian_'+(idx_t)][gridlat,gridlon]
            if (0.5<kelas<=20):
                status="1" #HUJAN RINGAN
            elif(20<kelas<=50):
                status="2" #HUJAN SEDANG
            elif(50<kelas<=100):
                status="3" #HUJAN LEBAT
            elif(100<kelas<=150):
                status="4" #HUJAN SANGAT LEBAT
            elif(kelas>150):
                status="5" #HUJAN EKSTREM
                
            df["klasifikasi_hujan"] = status
            
            status_cek = globals()['hujanharian_'+(idx_t)][gridlat,gridlon]
            if (0.5<status_cek<=50):
                status_1="1" #AMAN
            elif(50<status_cek<=75):
                status_1="2" #WASPADA
            elif(75<status_cek<=100):
                status_1="3" #SIAGA
            elif(status_cek>100):
                status_1="4" #AWAS
            
            df["status_akhir"] = status_1
            
            df_dasWaspada = pd.concat([df_dasWaspada,df])
            break
        else:
            continue

df = df_dasWaspada.sort_values(by="tanggal")
df = df.set_index("tanggal")
print (df)

In [None]:
# Create a Pandas Excel writer using XlsxWriter as the engine.
print('Create accumulated data...')

writer = pd.ExcelWriter('../repository/pre-processing/accumulation/accum_'+\
                        (dates[11+n]).strftime('%m%d%Y')+'_' #%Y%m%d
                        +(dates[26+n]).strftime('%m%d%Y')+'.xlsx', engine='xlsxwriter')
# Write each dataframe to a different worksheet.
df.to_excel(writer, sheet_name='Akumulasi Berjalan')
writer.close()

print('Data accumulation has been completed...')

print('Read accumulation data...')
# data = pd.read_excel("../new-repository/pre-processing/accumulation/accum_01172025_01192025.xlsx")
data = pd.read_excel(
    f"../repository/pre-processing/accumulation/accum_{(dates[11+n]).strftime('%m%d%Y')}_{(dates[26+n]).strftime('%m%d%Y')}.xlsx"
)
print("accumulation data value :")
print(data)

In [None]:
from datetime import datetime, timedelta

tanggal_hari_ini = datetime.now()
tanggal_besok = tanggal_hari_ini + timedelta(days=1)

tanggal_besok_str = tanggal_besok.strftime("%d %B %Y")
print("Tomorrow's date:", tanggal_besok_str)

data_tanggal_besok = data[data['tanggal'] == tanggal_besok_str]
data_tanggal_besok

In [None]:
print("Result sorting descanding:", tanggal_besok_str)
data_tanggal_besok_sorted = data_tanggal_besok.sort_values(by='ch_01:00', ascending=False)
data_tanggal_besok_sorted

In [None]:
def create_remove_duplicate_tabular_data(data, subset_columns, output_folder, file_prefix, index_column):
    print(f'Create tabular data for {file_prefix}...')

    data_filtered = data.drop_duplicates(subset=subset_columns, keep='first')

    remove_duplicate_tabular_data_path = os.path.join(output_folder, f"{file_prefix}_{(dates[11+n]).strftime('%m%d%Y')}.xlsx")

    with pd.ExcelWriter(remove_duplicate_tabular_data_path, engine='xlsxwriter') as writer:
        data_filtered.to_excel(writer, sheet_name=file_prefix.capitalize())

    print(f'Tabular data for {file_prefix} has been completed and saved')

    pivot = pd.pivot_table(
        data_filtered,
        index=index_column,
        columns='klasifikasi_hujan',
        values='kabkot',
        aggfunc='count',
        fill_value=0
    )

    print(pivot)

create_remove_duplicate_tabular_data(
    data=data_tanggal_besok_sorted,
    subset_columns=['balai', 'kabkot'],
    output_folder="../repository/pre-processing/remove-duplicate/balai/",
    file_prefix="remdup_balai",
    index_column="balai"
)

create_remove_duplicate_tabular_data(
    data=data_tanggal_besok_sorted,
    subset_columns=['kabkot'],
    output_folder="../repository/pre-processing/remove-duplicate/pulau/",
    file_prefix="remdup_pulau",
    index_column="pulau"
)

In [None]:
def prediction_data(remove_duplicate_path, get_columns, group_by, output_prediction, uniq_file_name):
    print(f'Processing data for {uniq_file_name}...')

    data_for_prediction = pd.read_excel(remove_duplicate_path)
    prediction_data_filtered = data_for_prediction[get_columns]

    add_columns = [
        'total_kl_1', 'total_kl_2', 'total_kl_3', 'total_kl_4', 'total_kl_5',
        'total_kg_1', 'total_kg_2', 'total_kg_3', 'total_kg_4',
        'kelas_kl_1', 'kelas_kl_2', 'kelas_kl_3', 'kelas_kl_4', 'kelas_kl_5',
        'kelas_kg_1', 'kelas_kg_2', 'kelas_kg_3', 'kelas_kg_4',
        'last_data', 'last_updt'
    ]

    for col in add_columns:
        prediction_data_filtered[col] = 0

    # Menghitung jumlah total_kl_x per wilayah
    for wilayah in prediction_data_filtered['wilayah'].unique():
        wilayah_data = prediction_data_filtered[prediction_data_filtered['wilayah'] == wilayah]
        for i in range(1, 6): 
            prediction_data_filtered.loc[prediction_data_filtered['wilayah'] == wilayah, f'total_kl_{i}'] = wilayah_data[wilayah_data['klasifikasi_hujan'] == i].shape[0]

    # Menghitung jumlah total_kg_x per wilayah
    for wilayah in prediction_data_filtered['wilayah'].unique():
        wilayah_data = prediction_data_filtered[prediction_data_filtered['wilayah'] == wilayah]
        for i in range(1, 5): 
            prediction_data_filtered.loc[prediction_data_filtered['wilayah'] == wilayah, f'total_kg_{i}'] = wilayah_data[wilayah_data['status_akhir'] == i].shape[0]

    # Menghitung jumlah kelas_kl_x per grup (balai atau pulau)
    for group in prediction_data_filtered[group_by].unique():
        group_data = prediction_data_filtered[prediction_data_filtered[group_by] == group]
        for i in range(1, 6): 
            prediction_data_filtered.loc[prediction_data_filtered[group_by] == group, f'kelas_kl_{i}'] = group_data[group_data['klasifikasi_hujan'] == i].shape[0]

    # Menghitung jumlah kelas_kg_x per grup (balai atau pulau)
    for group in prediction_data_filtered[group_by].unique():
        group_data = prediction_data_filtered[prediction_data_filtered[group_by] == group]
        for i in range(1, 5): 
            prediction_data_filtered.loc[prediction_data_filtered[group_by] == group, f'kelas_kg_{i}'] = group_data[group_data['status_akhir'] == i].shape[0]

    # Tambahkan kolom last_data dan last_updt
    prediction_data_filtered['last_data'] = filename
    prediction_data_filtered['last_updt'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

    # Hapus duplikasi berdasarkan kolom grup (balai atau pulau)
    prediction_data_filtered = prediction_data_filtered.drop_duplicates(subset=[group_by])

    # Tentukan path output
    output_file = os.path.join(output_prediction, f"{uniq_file_name}_{(dates[11+n]).strftime('%m%d%Y')}_2200.csv")

    # Simpan ke CSV
    prediction_data_filtered.to_csv(output_file, index=False)

    print(f'Data processing for {uniq_file_name} completed')


# **Proses Data untuk Balai**
prediction_data(
    remove_duplicate_path=f"../repository/pre-processing/remove-duplicate/balai/remdup_balai_{(dates[11+n]).strftime('%m%d%Y')}.xlsx",
    get_columns=[
        'tanggal', 'long_prod', 'lat_prod', 'longitude', 'latitude', 'wilayah', 'kode_balai', 'balai', 'ch_01:00',
        'klasifikasi_hujan', 'status_akhir'
    ],
    group_by="balai",
    output_prediction="../repository/processing/day/balai/",
    uniq_file_name="balai_pch_day"
)

# **Proses Data untuk Pulau**
prediction_data(
    remove_duplicate_path=f"../repository/pre-processing/remove-duplicate/pulau/remdup_pulau_{(dates[11+n]).strftime('%m%d%Y')}.xlsx",
    get_columns=[
        'tanggal', 'long_prod', 'lat_prod', 'longitude', 'latitude', 'wilayah', 'pulau', 'ch_01:00',
        'klasifikasi_hujan', 'status_akhir'
    ],
    group_by="pulau",
    output_prediction="../repository/processing/day/pulau/",
    uniq_file_name="pulau_pch_day"
)


In [None]:
def csv_to_shp(csv_file, output_wfs_dir, output_shp):

    os.makedirs(output_wfs_dir, exist_ok=True)

    data = pd.read_csv(csv_file)

    columns_to_drop = ['ch_01:00', 'klasifikasi_hujan', 'status_akhir']
    data = data.drop(columns=[col for col in columns_to_drop if col in data.columns], errors='ignore')

    if 'latitude' in data.columns and 'longitude' in data.columns:
        data['geometry'] = data.apply(lambda row: Point(row['longitude'], row['latitude']), axis=1)

        gdf = gpd.GeoDataFrame(data, geometry='geometry')
        gdf.set_crs(epsg=4326, inplace=True)
        gdf.to_file(output_shp, driver='ESRI Shapefile')

        print(f"Successfully created vektor data {output_shp}")
    else:
        print("Column 'latitude' or 'longitude' was not found in the CSV file")

start_date = (dates[11+n]).strftime('%m%d%Y')

output_balai_dir = f"../repository/post-processing/wfs/balai/pch_balai_{start_date}/"
output_pulau_dir = f"../repository/post-processing/wfs/pulau/pch_pulau_{start_date}/"

csv_to_shp(
    csv_file=f"../repository/processing/day/balai/balai_pch_day_{start_date}_2200.csv",
    output_wfs_dir=output_balai_dir,
    output_shp=f"{output_balai_dir}balai_pch_day_{start_date}_2200.shp"
)

# Panggil fungsi untuk Pulau
csv_to_shp(
    csv_file=f"../repository/processing/day/pulau/pulau_pch_day_{start_date}_2200.csv",
    output_wfs_dir=output_pulau_dir,
    output_shp=f"{output_pulau_dir}pulau_pch_day_{start_date}_2200.shp"
)

In [None]:
shp_dirs = [f"../repository/post-processing/wfs/balai/pch_balai_{start_date}/", f"../repository/post-processing/wfs/pulau/pch_pulau_{start_date}/"]
tif_dirs = [f"../repository/post-processing/wms/day/pch_day_{start_date}/", f"../repository/post-processing/wms/hour/pch_hour_{start_date}/"]
geoserver_endpoint = "http://admin:geoserver@127.0.0.1:8080/geoserver"
workspace = "demo_simadu"

def upload_to_geoserver(data_path, store_name, geoserver_endpoint, workspace):
    file_extension = os.path.splitext(data_path)[1].lower()
    if file_extension == ".shp":
        file_type = "shp"
        store_type = "datastores"
    elif file_extension == ".tif":
        file_type = "geotiff"
        store_type = "coveragestores"
    else:
        print(f"File type {file_extension} not supported for upload.")
        return False

    absolute_path = os.path.abspath(data_path).replace("\\", "/")
    url = f"{geoserver_endpoint}/rest/workspaces/{workspace}/{store_type}/{store_name}/external.{file_type}"

    headers = {"Content-type": "text/plain"}
    response = requests.put(url, data=f"file://{absolute_path}", headers=headers, auth=("admin", "geoserver"))

    if response.status_code in [200, 201]:
        print(f"Successfully uploaded {data_path} to geoserver")
        return True
    else:
        print(f"Failed to upload {data_path} to geoserver. Status code: {response.status_code}")
        return False

def process_and_upload_to_geoserver(shp_dirs, tif_dirs, geoserver_endpoint, workspace):
    for shp_dir in shp_dirs:
        shp_files = [os.path.join(shp_dir, file) for file in os.listdir(shp_dir) if file.endswith('.shp')]
        for shp_file in shp_files:
            store_name = os.path.splitext(os.path.basename(shp_file))[0]
            upload_to_geoserver(shp_file, store_name, geoserver_endpoint, workspace)

    for tif_dir in tif_dirs:
        tif_files = [os.path.join(tif_dir, file) for file in os.listdir(tif_dir) if file.endswith('.tif')]
        for tif_file in tif_files:
            store_name = os.path.splitext(os.path.basename(tif_file))[0]
            upload_to_geoserver(tif_file, store_name, geoserver_endpoint, workspace)

# Jalankan proses
process_and_upload_to_geoserver(
    shp_dirs,
    tif_dirs,
    geoserver_endpoint,
    workspace,
)