In [1]:
import os
import shutil
import ftplib
import datetime
import requests
import rasterio
import numpy as np
import xarray as xr
import pandas as pd
import geopandas as gpd

from rasterio.mask import mask
from shapely.geometry import Point
from rasterio.transform import from_origin
from netCDF4 import Dataset, num2date

In [2]:
path_nc_file = "../data/row"
path_nc_row = "../repository/pre-processing/row"
path_modified = "../repository/pre-processing/result-row"
path_pch_tabular = "../data/tabular/data_pch_balai_01212025.xlsx"
mask_pulau = "../data/geojson/pulau.geojson"

In [3]:
ftp_host = os.getenv("HOST")
ftp_user = os.getenv("USER")
ftp_password = os.getenv("PASSWORD")
cycle = "12"

def connect_ftp():
    ftp = ftplib.FTP(ftp_host)
    ftp.login(ftp_user, ftp_password)
    ftp.cwd("/")
    return ftp

def download_file_from_ftp(ftp, filename):
    try:
        file_list = ftp.nlst()
        if filename in file_list:
            local_file_path = os.path.join(path_nc_file, filename)
            if not os.path.exists(local_file_path):
                with open(local_file_path, "wb") as local_file:
                    ftp.retrbinary(f"RETR {filename}", local_file.write)
                print(f"Download successfully {filename}")
            else:
                print(f"File {filename} is available")
            return local_file_path
    except Exception:
        print("File is corrupted, and there is nothing that can be done.")
        return None

def download_latest_file_from_ftp(ftp):
    file_list = ftp.nlst()
    if file_list:
        latest_file = sorted(file_list)[-1]
        return download_file_from_ftp(ftp, latest_file)
    return None

today = datetime.date.today() - datetime.timedelta(days=1)
filename = f"ECMWF.0125.{today.strftime('%Y%m%d')}{cycle}00.PREC.nc"
print("Downloading:", filename)

ftp = connect_ftp()
if ftp:
    local_file_path = download_file_from_ftp(ftp, filename) or download_latest_file_from_ftp(ftp)
    ftp.quit()
else:
    print("Cannot connect to FTP server")

if local_file_path is None:
    print("File is currently unavailable for download.")
    exit()

Downloading: ECMWF.0125.202503041200.PREC.nc
Download successfully ECMWF.0125.202503041200.PREC.nc


In [None]:
data = Dataset(local_file_path)

lat = data.variables['lat'][:]
lon = data.variables['lon'][:]
prec = data.variables['tp'][:,0,:,:]
time = data.variables['time'][:]

dates = num2date(time, data.variables['time'].units)
print((dates[7]+datetime.timedelta(hours=7)).strftime('%Y-%m-%d %H:%M:%S WIB'))
print((dates[15]+datetime.timedelta(hours=7)).strftime('%Y-%m-%d %H:%M:%S WIB'))

xrain = prec
print(np.array_equal(xrain,prec))

for time in range (len(dates)):
    for lati in range(len(lat)) :
        for loni in range (len(lon)) :
            if (time<=0) :
                if (xrain[time,lati,loni]<=0) :
                    xrain[time,lati,loni] == 0
            elif(time>0) :
                if (xrain[time,lati,loni]<0) :
                    xrain[time,lati,loni] = xrain[time-1,lati,loni]
                if (xrain[time,lati,loni]-xrain[time-1,lati,loni]<0) :
                    xrain[time,lati,loni] = xrain[time-1,lati,loni]

hjn = np.empty((len(dates),len(lat),len(lon)))
hjn[0,:,:] = xrain[0,:,:]
for i in range (1,len(dates)) :
    hjn[i,:,:] = xrain[i,:,:]-xrain[i-1,:,:]

hjn2 = hjn.reshape(len(dates), 1, len(lat), len(lon))

ds = xr.open_dataset(local_file_path)
ds['tp'].values = hjn2
ds = ds.assign_coords(time=("time",ds['time'].values + np.timedelta64(7,'h')))

output_rewrite = f"ECMWF_new.0125.{today.strftime('%Y%m%d')}{cycle}00.PREC.nc"
output_path = os.path.join(path_nc_row, output_rewrite)
ds.to_netcdf(output_path)
print (ds)

2025-03-05 16:00:00 WIB
2025-03-06 16:00:00 WIB
True
<xarray.Dataset> Size: 43MB
Dimensions:  (lat: 185, lon: 449, lev: 1, time: 65)
Coordinates:
  * lat      (lat) float64 1kB 9.0 8.875 8.75 8.625 ... -13.75 -13.88 -14.0
  * lon      (lon) float64 4kB 92.0 92.12 92.25 92.38 ... 147.8 147.9 148.0
  * lev      (lev) float64 8B 1.013e+03
  * time     (time) datetime64[ns] 520B 2025-03-04T19:00:00 ... 2025-03-14T19...
Data variables:
    tp       (time, lev, lat, lon) float64 43MB 0.0 0.0 0.0 ... 0.6562 0.9375
Attributes:
    title:        IFS Precipitation
    conventions:  COARDS
    datatype:     Grid
    cachesize:    626240 bytes


In [None]:
result_file_name = f"ECMWF_new_3d.0125.{today.strftime('%Y%m%d')}{cycle}00.PREC.nc"

file_path = os.path.join(path_modified, result_file_name)

f = Dataset(file_path, 'w', format='NETCDF4')
print(f"File {result_file_name} berhasil dibuat")
print (data.variables)

rain = hjn[:48,:,:]
latitude = data.variables["lat"][:]
longitude = data.variables["lon"][:]
time_k = data.variables['time'][:48]

tempgrp = f.createGroup('Rain_data')

f.createDimension('lon', len(longitude))
f.createDimension('lat', len(latitude))
f.createDimension('time', len(time_k))

lon = f.createVariable('lon', 'f4', 'lon')
lat = f.createVariable('lat', 'f4', 'lat')  
rain = f.createVariable('rain', 'f4', ('time', 'lat', 'lon'))
time = f.createVariable('time', 'i4', 'time')

lon[:] = longitude[:]
lat[:] = latitude[:]
rain[:,:,:] = hjn[:48,:,:]
time[:] = time_k+7

print (dates[0].strftime('%Y-%m-%d ')+str(cycle)+":00:00")

f.description = "ECMWF from BMKG modified by Jhon doe"
f.history = "Created " + today.strftime("%d/%m/%y")

lon.units = 'degree_east'
lat.units = 'degree_north'
time.units = 'hours since '+(dates[0]+datetime.timedelta(hours=7)).strftime('%Y-%m-%d ')+str(cycle)+":00:00"
rain.units = 'mm'

f.close()

File ECMWF_new_3d.0125.202503041200.PREC.nc berhasil dibuat
{'lat': <class 'netCDF4.Variable'>
float64 lat(lat)
    grads_dim: y
    grads_mapping: linear
    grads_size: 185
    units: degrees_north
    long_name: latitude
    minimum: -14.0
    maximum: 9.0
    resolution: -0.125
unlimited dimensions: 
current shape = (185,)
filling on, default _FillValue of 9.969209968386869e+36 used, 'lon': <class 'netCDF4.Variable'>
float64 lon(lon)
    grads_dim: x
    grads_mapping: linear
    grads_size: 449
    units: degrees_east
    long_name: longitude
    minimum: 92.0
    maximum: 148.0
    resolution: 0.125
unlimited dimensions: 
current shape = (449,)
filling on, default _FillValue of 9.969209968386869e+36 used, 'lev': <class 'netCDF4.Variable'>
float64 lev(lev)
    grads_dim: z
    grads_mapping: levels
    units: millibar
    long_name: altitude
unlimited dimensions: 
current shape = (1,)
filling on, default _FillValue of 9.969209968386869e+36 used, 'time': <class 'netCDF4.Variable'>


In [6]:
data_path = f"../repository/pre-processing/result-row\ECMWF_new_3d.0125.{today.strftime('%Y%m%d')}{cycle}00.PREC.nc"

n = 0

data = Dataset(data_path)
print(data.variables)

{'lon': <class 'netCDF4.Variable'>
float32 lon(lon)
    units: degree_east
unlimited dimensions: 
current shape = (449,)
filling on, default _FillValue of 9.969209968386869e+36 used, 'lat': <class 'netCDF4.Variable'>
float32 lat(lat)
    units: degree_north
unlimited dimensions: 
current shape = (185,)
filling on, default _FillValue of 9.969209968386869e+36 used, 'rain': <class 'netCDF4.Variable'>
float32 rain(time, lat, lon)
    units: mm
unlimited dimensions: 
current shape = (48, 185, 449)
filling on, default _FillValue of 9.969209968386869e+36 used, 'time': <class 'netCDF4.Variable'>
int32 time(time)
    units: hours since 2025-03-04 12:00:00
unlimited dimensions: 
current shape = (48,)
filling on, default _FillValue of -2147483647 used}


PROSES UNTUK WMS

In [None]:
time = data.variables['time'][:]
dates = num2date(time, data.variables['time'].units)
print((dates[10+n]).strftime('%Y-%m-%d %H:%M:%S WIB'))
print((dates[19+n]).strftime('%Y-%m-%d %H:%M:%S WIB'))

lon_wms = np.array(data.variables["lon"][:])
lat_wms = np.array(data.variables["lat"][:])
prec_wms = np.array(data.variables["rain"][:])

xx, yy = np.meshgrid(lon_wms, lat_wms)
accumulation = 0
start_date = dates[11+n - 1].strftime('%m%d%Y')

input_hour_dir = f"../repository/post-processing/nc_to_tiff/pch_hour_{start_date}/"
input_day_dir = f"../repository/post-processing/nc_to_tiff/pch_day_{start_date}/"
output_masked_hour_dir = f"../repository/post-processing/wms/hour/pch_hour_{start_date}/"
output_masked_day_dir = f"../repository/post-processing/wms/day/pch_day_{start_date}/"

os.makedirs(input_hour_dir, exist_ok=True)
os.makedirs(input_day_dir, exist_ok=True)
os.makedirs(output_masked_hour_dir, exist_ok=True)
os.makedirs(output_masked_day_dir, exist_ok=True)

accumulation=0

for k in range(11+n, 19+n):
    hour_data = prec_wms[k, :, :]
    accumulation += hour_data

np.max(accumulation)

def classify_rainfall(data):
    classified = np.zeros_like(data, dtype=np.uint8)  
    classified[(data > 0.5) & (data <= 20)] = 1       
    classified[(data > 20) & (data <= 50)] = 2        
    classified[(data > 50) & (data <= 100)] = 3       
    classified[(data > 100) & (data <= 150)] =4       
    classified[data > 150] = 5                        
    return classified

classified_rain = classify_rainfall(accumulation)

resolution_lon = (lon_wms.max() - lon_wms.min()) / lon_wms.shape[0]
resolution_lat = (lat_wms.max() - lat_wms.min()) / lat_wms.shape[0]

transform = from_origin(lon_wms.min(), lat_wms.max(), resolution_lon, resolution_lat)

tiff_filename_hour = os.path.join(input_day_dir, f"pch_day_{start_date}.tif")

with rasterio.open(
    tiff_filename_hour,
    'w',
    driver='GTiff',
    height=classified_rain.shape[0],
    width=classified_rain.shape[1],
    count=1,
    dtype=classified_rain.dtype,
    crs=rasterio.crs.CRS.from_proj4("+proj=longlat +datum=WGS84 +no_defs"),
    transform=transform
) as dst:
    dst.write(classified_rain, 1)
print(f"Successfully convert 1D netCDF to tiff: {tiff_filename_hour}")

def classify_rainfall3h(data):
    classified = np.zeros_like(data, dtype=np.uint8)  
    classified[(data > 1) & (data <= 5)] = 1            
    classified[(data > 5) & (data <= 10)] = 2           
    classified[(data > 10) & (data <= 20)] = 3          
    classified[data > 20] = 4                           
    return classified

for k in range(11+n, 19+n):
    hour_data = prec_wms[k, :, :]
    
    classified_rain3h = classify_rainfall3h(hour_data)
    
    start_hour = dates[k - 1].strftime('%H%M')
    
    resolution_lon = (lon_wms.max() - lon_wms.min()) / lon_wms.shape[0]
    resolution_lat = (lat_wms.max() - lat_wms.min()) / lat_wms.shape[0]
    transform = from_origin(lon_wms.min(), lat_wms.max(), resolution_lon, resolution_lat)

    tiff_filename_hour = os.path.join(input_hour_dir, f"pch_hour_{start_date}_{start_hour}_22.00.tif")

    with rasterio.open(
        tiff_filename_hour,
        'w',
        driver='GTiff',
        height=classified_rain3h.shape[0],
        width=classified_rain3h.shape[1],
        count=1,
        dtype=classified_rain3h.dtype,
        crs=rasterio.crs.CRS.from_proj4("+proj=longlat +datum=WGS84 +no_defs"),
        transform=transform
    ) as dst:
        dst.write(classified_rain3h, 1)

    print(f"Successfully convert 3H netCDF to tiff: {tiff_filename_hour}")

def masked_data(input_tiff, mask_file, output_masked_dir):
    gdf = gpd.read_file(mask_file)
    geometries = [geom for geom in gdf.geometry]

    with rasterio.open(input_tiff) as src:
        out_image, out_transform = mask(src, geometries, crop=True)
        out_meta = src.meta.copy()

    out_meta.update({
        "driver": "GTiff",
        "height": out_image.shape[1],
        "width": out_image.shape[2],
        "transform": out_transform
    })

    file_name = os.path.basename(input_tiff)
    output_masked_tiff = os.path.join(output_masked_dir, file_name)

    with rasterio.open(output_masked_tiff, "w", **out_meta) as dst:
        dst.write(out_image)

    print(f"Result masked: {output_masked_tiff}")

for file in os.listdir(input_hour_dir):
    if file.endswith(".tif"):
        input_tiff = os.path.join(input_hour_dir, file)
        masked_data(input_tiff, mask_pulau, output_masked_hour_dir)

for file in os.listdir(input_day_dir):
    if file.endswith(".tif"):
        input_tiff = os.path.join(input_day_dir, file)
        masked_data(input_tiff, mask_pulau, output_masked_day_dir)

def delete_nc_to_tif(folder_path_for_tif):
    try:
        if os.path.exists(folder_path_for_tif):
            shutil.rmtree(folder_path_for_tif)
            print(f"Successfully delete folder {folder_path_for_tif}")
        else:
            print(f"not found or previously deleted {folder_path_for_tif}")
    except Exception as e:
        print(f"Failed to delete folder {folder_path_for_tif}: {e}")

delete_nc_to_tif(input_hour_dir)
delete_nc_to_tif(input_day_dir)

2025-03-06 01:00:00 WIB
2025-03-07 04:00:00 WIB
Successfully convert 1D netCDF to tiff: ../repository/post-processing/nc_to_tiff/pch_day_03062025/pch_day_03062025.tif
Successfully convert 3H netCDF to tiff: ../repository/post-processing/nc_to_tiff/pch_hour_03062025/pch_hour_03062025_0100_22.00.tif
Successfully convert 3H netCDF to tiff: ../repository/post-processing/nc_to_tiff/pch_hour_03062025/pch_hour_03062025_0400_22.00.tif
Successfully convert 3H netCDF to tiff: ../repository/post-processing/nc_to_tiff/pch_hour_03062025/pch_hour_03062025_0700_22.00.tif
Successfully convert 3H netCDF to tiff: ../repository/post-processing/nc_to_tiff/pch_hour_03062025/pch_hour_03062025_1000_22.00.tif
Successfully convert 3H netCDF to tiff: ../repository/post-processing/nc_to_tiff/pch_hour_03062025/pch_hour_03062025_1300_22.00.tif
Successfully convert 3H netCDF to tiff: ../repository/post-processing/nc_to_tiff/pch_hour_03062025/pch_hour_03062025_1600_22.00.tif
Successfully convert 3H netCDF to tiff: .

PROSES UNTUK WFS

In [None]:
time = data.variables['time'][:]
dates = num2date(time, data.variables['time'].units)
print((dates[10+n]).strftime('%Y-%m-%d %H:%M:%S WIB'))
print((dates[16+n]).strftime('%Y-%m-%d %H:%M:%S WIB'))

lat_wfs = data.variables["lat"][:]
lon_wfs = data.variables["lon"][:]
prec_wfs = data.variables['rain'][:,:,:]

2025-03-06 01:00:00 WIB
2025-03-06 19:00:00 WIB


In [9]:
pd.set_option("display.max.columns",None)
grid = pd.read_excel(path_pch_tabular)
print (grid)

       lat_prod   long_prod  lat_data  long_data  idx_lat  idx_long  wilayah  \
0     -7.475905  111.498259    -7.500     110.50      132       148        2   
1     -7.475905  111.498259    -7.500     110.50      132       148        2   
2     -7.475905  111.498259    -7.750     110.75      134       150        2   
3     -7.475905  111.498259    -7.750     110.75      134       150        2   
4     -7.475905  111.498259    -7.750     110.50      134       148        2   
...         ...         ...       ...        ...      ...       ...      ...   
46990 -8.183800  113.727000    -8.125     123.75      137       254        2   
46991 -8.183800  113.727000    -8.125     123.75      137       254        2   
46992 -8.183800  113.727000    -8.125     123.75      137       254        2   
46993 -8.183800  113.727000    -8.125     123.75      137       254        2   
46994 -8.183800  113.727000    -8.125     123.75      137       254        2   

                         wilayah_sungai

In [None]:
grid_long = grid['idx_long'].to_numpy()
grid_lat = grid['idx_lat'].to_numpy()
longitude_r = grid['long_data']
latitude_r = grid['lat_data']
latitude_prod = grid['lat_prod']
longitude_prod = grid['long_prod']
pulau = grid['pulau']
balai = grid['balai']
kode_balai = grid['kode_balai']
ws = grid ['wilayah_sungai']
das = grid['das']
prov = grid["provinsi"]
kota = grid['kabkot']
wilayah = grid['wilayah']
latshape = grid_lat.shape[0]
latshape

46995

In [None]:
for k in range (11+n,19+n):
    print((dates[k]).strftime("%Y%m%d%H"))
    idx_t=(dates[k]).strftime("%Y%m%d%H")
    if (k==19+n):
        globals()['hujanharian_'+(idx_t)] = prec_wfs[11+n,:,:]
    else:
        globals()['hujanharian_'+(idx_t)] = prec_wfs[11+n:k+1,:,:].sum(axis=0)

2025030604
2025030607
2025030610
2025030613
2025030616
2025030619
2025030622
2025030701


In [None]:
for k in range (11+8+n,19+8+n):
    print((dates[k]).strftime("%Y%m%d%H"))
    idx_t=(dates[k]).strftime("%Y%m%d%H")
    if (k==19+8+n):
        globals()['hujanharian_'+(idx_t)] = prec_wfs[11+8+n,:,:]
    else:
        globals()['hujanharian_'+(idx_t)] = prec_wfs[11+8+n:k+1,:,:].sum(axis=0)

2025030704
2025030707
2025030710
2025030713
2025030716
2025030719
2025030722
2025030801


In [None]:
kolom = ['long_prod', 'lat_prod', 'tanggal', 'longitude','latitude','pulau', 'kode_balai', 'balai','das','provinsi','kabkot','wilayah']
df_dasWaspada = pd.DataFrame(columns=kolom)
for tab in range (latshape) :
    gridlat = grid_lat[tab]
    gridlon = grid_long[tab]
    for k in range (11+n,27+n):
        idx_t = (dates[k]).strftime("%Y%m%d%H")
        idx_h = (dates[k]).strftime("%H:00")
        hujan_cek = globals()['hujanharian_'+(idx_t)]
        
        i_idx = 11+n if k<19+n else 19+n
        tanggal = (dates[i_idx]).strftime("%d %B %Y")
        
        if (hujan_cek[gridlat,gridlon]>=0.5):
            df = pd.DataFrame([{'tanggal':tanggal, 'long_prod':longitude_prod[tab], 'lat_prod':latitude_prod[tab], 'longitude':longitude_r[tab],'latitude':latitude_r[tab], 'pulau':pulau[tab], 'kode_balai':kode_balai[tab], 'balai':balai[tab],\
                             'das':das[tab],'provinsi':prov[tab],'kabkot':kota[tab],'wilayah':wilayah[tab]\
                              ,'waktu_mulai':idx_h}])
            i_idx = (11+n) if k<(19+n) else (19+n)
            for i in range (i_idx,i_idx+8):
                idx_t = (dates[i]).strftime("%Y%m%d%H")
                idx_h = (dates[i]).strftime("%H:00")
                df['ch_'+idx_h] = globals()['hujanharian_'+(idx_t)][gridlat,gridlon]
            
            kelas=globals()['hujanharian_'+(idx_t)][gridlat,gridlon]
            if (0.5<kelas<=20):
                status="1" #HUJAN RINGAN
            elif(20<kelas<=50):
                status="2" #HUJAN SEDANG
            elif(50<kelas<=100):
                status="3" #HUJAN LEBAT
            elif(100<kelas<=150):
                status="4" #HUJAN SANGAT LEBAT
            elif(kelas>150):
                status="5" #HUJAN EKSTREM
                
            df["klasifikasi_hujan"] = status
            
            status_cek = globals()['hujanharian_'+(idx_t)][gridlat,gridlon]
            if (0.5<status_cek<=50):
                status_1="1" #AMAN
            elif(50<status_cek<=75):
                status_1="2" #WASPADA
            elif(75<status_cek<=100):
                status_1="3" #SIAGA
            elif(status_cek>100):
                status_1="4" #AWAS
            
            df["status_akhir"] = status_1
            
            df_dasWaspada = pd.concat([df_dasWaspada,df])
            break
        else:
            continue

df = df_dasWaspada.sort_values(by="tanggal")
df = df.set_index("tanggal")
print (df)

  df_dasWaspada = pd.concat([df_dasWaspada,df])


                long_prod  lat_prod  longitude  latitude       pulau  \
tanggal                                                                
06 March 2025  111.498259 -7.475905    110.500    -7.500        JAWA   
06 March 2025  102.365436 -3.519631    103.375    -4.375    SUMATERA   
06 March 2025  102.365436 -3.519631    103.375    -4.375    SUMATERA   
06 March 2025  102.365436 -3.519631    103.625    -4.500    SUMATERA   
06 March 2025  102.365436 -3.519631    103.500    -4.500    SUMATERA   
...                   ...       ...        ...       ...         ...   
07 March 2025  115.132146 -8.369793    115.125    -8.750  BALI & NTT   
07 March 2025  115.132146 -8.369793    115.125    -8.750  BALI & NTT   
07 March 2025  115.132146 -8.369793    115.625    -8.875  BALI & NTT   
07 March 2025  102.381755 -1.658707    101.500    -1.875    SUMATERA   
07 March 2025  100.260480 -0.619902    101.000    -1.500    SUMATERA   

               kode_balai               balai                 d

In [None]:
print('Create accumulated data...')

writer = pd.ExcelWriter('../repository/pre-processing/accumulation/accum_'+\
                        (dates[11+n]).strftime('%m%d%Y')+'_'
                        +(dates[26+n]).strftime('%m%d%Y')+'.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='Akumulasi Berjalan')
writer.close()

print('Data accumulation has been completed...')

print('Read accumulation data...')
data = pd.read_excel(
    f"../repository/pre-processing/accumulation/accum_{(dates[11+n]).strftime('%m%d%Y')}_{(dates[26+n]).strftime('%m%d%Y')}.xlsx"
)
print("accumulation data value :")
print(data)

Create accumulated data...
Data accumulation has been completed...
Read accumulation data...
accumulation data value :
             tanggal   long_prod  lat_prod  longitude  latitude       pulau  \
0      06 March 2025  111.498259 -7.475905    110.500    -7.500        JAWA   
1      06 March 2025  102.365436 -3.519631    103.375    -4.375    SUMATERA   
2      06 March 2025  102.365436 -3.519631    103.375    -4.375    SUMATERA   
3      06 March 2025  102.365436 -3.519631    103.625    -4.500    SUMATERA   
4      06 March 2025  102.365436 -3.519631    103.500    -4.500    SUMATERA   
...              ...         ...       ...        ...       ...         ...   
46574  07 March 2025  115.132146 -8.369793    115.125    -8.750  BALI & NTT   
46575  07 March 2025  115.132146 -8.369793    115.125    -8.750  BALI & NTT   
46576  07 March 2025  115.132146 -8.369793    115.625    -8.875  BALI & NTT   
46577  07 March 2025  102.381755 -1.658707    101.500    -1.875    SUMATERA   
46578  07 Ma

In [15]:
from datetime import datetime, timedelta

tanggal_hari_ini = datetime.now()
tanggal_besok = tanggal_hari_ini + timedelta(days=1)

tanggal_besok_str = tanggal_besok.strftime("%d %B %Y")
print("Tomorrow's date:", tanggal_besok_str)

data_tanggal_besok = data[data['tanggal'] == tanggal_besok_str]
data_tanggal_besok

Tomorrow's date: 06 March 2025


Unnamed: 0,tanggal,long_prod,lat_prod,longitude,latitude,pulau,kode_balai,balai,das,provinsi,kabkot,wilayah,waktu_mulai,ch_04:00,ch_07:00,ch_10:00,ch_13:00,ch_16:00,ch_19:00,ch_22:00,ch_01:00,klasifikasi_hujan,status_akhir
0,06 March 2025,111.498259,-7.475905,110.500,-7.500,JAWA,1.0,BBWS BENGAWAN SOLO,BENGAWAN SOLO,Daerah Istimewa Yogyakarta,Sleman,2,10:00,0.000000,0.078125,1.164062,4.085938,8.597656,9.574219,9.574219,9.574219,1,1
1,06 March 2025,102.365436,-3.519631,103.375,-4.375,SUMATERA,49.0,BWS SUMATERA VII,PADANG GUCI,Sumatera Selatan,Muara Enim,1,04:00,2.382812,2.832031,5.417969,11.308594,20.726562,22.687500,22.687500,22.722656,2,1
2,06 March 2025,102.365436,-3.519631,103.375,-4.375,SUMATERA,49.0,BWS SUMATERA VII,LUAS,Sumatera Selatan,Ogan Komering Ulu Selatan,1,04:00,2.382812,2.832031,5.417969,11.308594,20.726562,22.687500,22.687500,22.722656,2,1
3,06 March 2025,102.365436,-3.519631,103.625,-4.500,SUMATERA,49.0,BWS SUMATERA VII,LUAS,Sumatera Selatan,Ogan Komering Ulu Selatan,1,07:00,0.253906,2.632812,5.742188,5.863281,7.835938,8.324219,8.324219,8.324219,1,1
4,06 March 2025,102.365436,-3.519631,103.500,-4.500,SUMATERA,49.0,BWS SUMATERA VII,LUAS,Sumatera Selatan,Ogan Komering Ulu Selatan,1,04:00,1.027344,3.234375,3.992188,4.066406,9.031250,11.460938,11.460938,11.496094,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45501,06 March 2025,116.854937,3.014629,118.000,2.375,KALIMANTAN,53.0,BWS KALIMANTAN V,,Kalimantan Utara,Bulungan,1,04:00,3.347656,3.464844,3.632812,4.304688,5.152344,7.980469,8.070312,8.480469,1,1
45502,06 March 2025,115.956516,0.275581,115.125,1.875,KALIMANTAN,52.0,BWS KALIMANTAN IV,KAYAN,Kalimantan Utara,Malinau,1,10:00,0.000000,0.000000,5.949219,15.699219,17.367188,19.601562,21.640625,21.828125,2,1
45503,06 March 2025,116.854937,3.014629,117.750,2.500,KALIMANTAN,53.0,BWS KALIMANTAN V,,Kalimantan Utara,Bulungan,1,04:00,3.210938,3.261719,3.710938,10.093750,20.398438,21.996094,21.996094,22.007812,2,1
45504,06 March 2025,116.854937,3.014629,117.625,2.500,KALIMANTAN,53.0,BWS KALIMANTAN V,KAYAN,Kalimantan Utara,Bulungan,1,04:00,1.359375,1.433594,1.917969,2.828125,15.683594,15.992188,16.000000,16.082031,1,1


In [16]:
print("Result sorting descanding:", tanggal_besok_str)
data_tanggal_besok_sorted = data_tanggal_besok.sort_values(by='ch_01:00', ascending=False)
data_tanggal_besok_sorted

Result sorting descanding: 06 March 2025


Unnamed: 0,tanggal,long_prod,lat_prod,longitude,latitude,pulau,kode_balai,balai,das,provinsi,kabkot,wilayah,waktu_mulai,ch_04:00,ch_07:00,ch_10:00,ch_13:00,ch_16:00,ch_19:00,ch_22:00,ch_01:00,klasifikasi_hujan,status_akhir
40723,06 March 2025,114.792207,-1.317482,115.250,-2.125,KALIMANTAN,11.0,BWS KALIMANTAN III,BARITO,Kalimantan Tengah,Barito Timur,1,10:00,0.085938,0.109375,1.738281,4.050781,7.500000,13.976562,28.156250,93.375000,3,3
42737,06 March 2025,114.792207,-1.317482,115.250,-2.125,KALIMANTAN,11.0,BWS KALIMANTAN III,BARITO,Kalimantan Selatan,Tabalong,1,10:00,0.085938,0.109375,1.738281,4.050781,7.500000,13.976562,28.156250,93.375000,3,3
40753,06 March 2025,114.792207,-1.317482,115.250,-2.250,KALIMANTAN,11.0,BWS KALIMANTAN III,BARITO,Kalimantan Tengah,Barito Timur,1,13:00,0.015625,0.039062,0.421875,3.449219,8.726562,16.585938,30.390625,81.871094,3,3
42742,06 March 2025,114.792207,-1.317482,115.250,-2.250,KALIMANTAN,11.0,BWS KALIMANTAN III,BARITO,Kalimantan Selatan,Tabalong,1,13:00,0.015625,0.039062,0.421875,3.449219,8.726562,16.585938,30.390625,81.871094,3,3
42886,06 March 2025,114.792207,-1.317482,114.875,-2.750,KALIMANTAN,11.0,BWS KALIMANTAN III,BARITO,Kalimantan Selatan,Tapin,1,10:00,0.164062,0.246094,3.660156,3.941406,6.796875,19.562500,31.511719,74.417969,3,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5039,06 March 2025,113.727000,-8.183800,100.875,-1.875,SUMATERA,999.0,NON-B/BWS,PUNGGASAN,Sumatera Barat,Pesisir Selatan,1,19:00,0.417969,0.417969,0.496094,0.496094,0.496094,0.500000,0.500000,0.500000,1,1
5038,06 March 2025,113.727000,-8.183800,100.875,-1.875,SUMATERA,999.0,NON-B/BWS,PELANGAI,Sumatera Barat,Pesisir Selatan,1,19:00,0.417969,0.417969,0.496094,0.496094,0.496094,0.500000,0.500000,0.500000,1,1
5037,06 March 2025,113.727000,-8.183800,100.875,-1.875,SUMATERA,999.0,NON-B/BWS,INDRAPURA,Sumatera Barat,Pesisir Selatan,1,19:00,0.417969,0.417969,0.496094,0.496094,0.496094,0.500000,0.500000,0.500000,1,1
5036,06 March 2025,113.727000,-8.183800,100.875,-1.875,SUMATERA,999.0,NON-B/BWS,AIR HAJI,Sumatera Barat,Pesisir Selatan,1,19:00,0.417969,0.417969,0.496094,0.496094,0.496094,0.500000,0.500000,0.500000,1,1


In [17]:
def create_remove_duplicate_tabular_data(data, subset_columns, output_folder, file_prefix, index_column):
    print(f'Create tabular data for {file_prefix}...')

    data_filtered = data.drop_duplicates(subset=subset_columns, keep='first')

    remove_duplicate_tabular_data_path = os.path.join(output_folder, f"{file_prefix}_{(dates[11+n]).strftime('%m%d%Y')}.xlsx")

    with pd.ExcelWriter(remove_duplicate_tabular_data_path, engine='xlsxwriter') as writer:
        data_filtered.to_excel(writer, sheet_name=file_prefix.capitalize())

    print(f'Tabular data for {file_prefix} has been completed and saved')

    pivot = pd.pivot_table(
        data_filtered,
        index=index_column,
        columns='klasifikasi_hujan',
        values='kabkot',
        aggfunc='count',
        fill_value=0
    )

    print(pivot)

create_remove_duplicate_tabular_data(
    data=data_tanggal_besok_sorted,
    subset_columns=['balai', 'kabkot'],
    output_folder="../repository/pre-processing/remove-duplicate/balai/",
    file_prefix="remdup_balai",
    index_column="balai"
)

create_remove_duplicate_tabular_data(
    data=data_tanggal_besok_sorted,
    subset_columns=['kabkot'],
    output_folder="../repository/pre-processing/remove-duplicate/pulau/",
    file_prefix="remdup_pulau",
    index_column="pulau"
)

Create tabular data for remdup_balai...
Tabular data for remdup_balai has been completed and saved
klasifikasi_hujan                1    2   3
balai                                      
BBWS BENGAWAN SOLO              17   11   0
BBWS BRANTAS                    10   13   0
BBWS CIDANAU-CIUJUNG-CIDURIAN    5    2   0
BBWS CILIWUNG-CISADANE          12    5   0
BBWS CIMANUK-CISANGGARUNG        5    8   0
BBWS CITANDUY                   10    1   0
BBWS CITARUM                     5    9   0
BBWS MESUJI SEKAMPUNG            8    9   0
BBWS NT I                       11    0   0
BBWS NT II                      14    0   0
BBWS PEMALI JUANA                9    7   0
BBWS POMPENGAN JENEBERANG        3   26   5
BBWS SERAYU OPAK                 9   16   0
BBWS SUMATERA II                11    9   0
BBWS SUMATERA VIII              18   16   0
BWS BALI PENIDA                  9    0   0
BWS BANGKA BELITUNG              2    4   0
BWS KALIMANTAN I                 0   17   1
BWS KALIMANTAN II    

In [None]:
def prediction_data(remove_duplicate_path, get_columns, group_by, output_prediction, uniq_file_name):
    print(f'Processing data for {uniq_file_name}...')

    data_for_prediction = pd.read_excel(remove_duplicate_path)
    prediction_data_filtered = data_for_prediction[get_columns]

    add_columns = [
        'total_kl_1', 'total_kl_2', 'total_kl_3', 'total_kl_4', 'total_kl_5',
        'total_kg_1', 'total_kg_2', 'total_kg_3', 'total_kg_4',
        'kelas_kl_1', 'kelas_kl_2', 'kelas_kl_3', 'kelas_kl_4', 'kelas_kl_5',
        'kelas_kg_1', 'kelas_kg_2', 'kelas_kg_3', 'kelas_kg_4',
        'last_data', 'last_updt'
    ]

    for col in add_columns:
        prediction_data_filtered[col] = 0

    for wilayah in prediction_data_filtered['wilayah'].unique():
        wilayah_data = prediction_data_filtered[prediction_data_filtered['wilayah'] == wilayah]
        for i in range(1, 6): 
            prediction_data_filtered.loc[prediction_data_filtered['wilayah'] == wilayah, f'total_kl_{i}'] = wilayah_data[wilayah_data['klasifikasi_hujan'] == i].shape[0]

    for wilayah in prediction_data_filtered['wilayah'].unique():
        wilayah_data = prediction_data_filtered[prediction_data_filtered['wilayah'] == wilayah]
        for i in range(1, 5): 
            prediction_data_filtered.loc[prediction_data_filtered['wilayah'] == wilayah, f'total_kg_{i}'] = wilayah_data[wilayah_data['status_akhir'] == i].shape[0]

    for group in prediction_data_filtered[group_by].unique():
        group_data = prediction_data_filtered[prediction_data_filtered[group_by] == group]
        for i in range(1, 6): 
            prediction_data_filtered.loc[prediction_data_filtered[group_by] == group, f'kelas_kl_{i}'] = group_data[group_data['klasifikasi_hujan'] == i].shape[0]

    for group in prediction_data_filtered[group_by].unique():
        group_data = prediction_data_filtered[prediction_data_filtered[group_by] == group]
        for i in range(1, 5): 
            prediction_data_filtered.loc[prediction_data_filtered[group_by] == group, f'kelas_kg_{i}'] = group_data[group_data['status_akhir'] == i].shape[0]

    prediction_data_filtered['last_data'] = filename
    prediction_data_filtered['last_updt'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

    prediction_data_filtered = prediction_data_filtered.drop_duplicates(subset=[group_by])

    output_file = os.path.join(output_prediction, f"{uniq_file_name}_{(dates[11+n]).strftime('%m%d%Y')}_2200.csv")

    prediction_data_filtered.to_csv(output_file, index=False)

    print(f'Data processing for {uniq_file_name} completed')


prediction_data(
    remove_duplicate_path=f"../repository/pre-processing/remove-duplicate/balai/remdup_balai_{(dates[11+n]).strftime('%m%d%Y')}.xlsx",
    get_columns=[
        'tanggal', 'long_prod', 'lat_prod', 'longitude', 'latitude', 'wilayah', 'kode_balai', 'balai', 'ch_01:00',
        'klasifikasi_hujan', 'status_akhir'
    ],
    group_by="balai",
    output_prediction="../repository/processing/day/balai/",
    uniq_file_name="balai_pch_day"
)

prediction_data(
    remove_duplicate_path=f"../repository/pre-processing/remove-duplicate/pulau/remdup_pulau_{(dates[11+n]).strftime('%m%d%Y')}.xlsx",
    get_columns=[
        'tanggal', 'long_prod', 'lat_prod', 'longitude', 'latitude', 'wilayah', 'pulau', 'ch_01:00',
        'klasifikasi_hujan', 'status_akhir'
    ],
    group_by="pulau",
    output_prediction="../repository/processing/day/pulau/",
    uniq_file_name="pulau_pch_day"
)


Processing data for balai_pch_day...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prediction_data_filtered[col] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prediction_data_filtered[col] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prediction_data_filtered[col] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_index

Data processing for balai_pch_day completed
Processing data for pulau_pch_day...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prediction_data_filtered[col] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prediction_data_filtered[col] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prediction_data_filtered[col] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_index

Data processing for pulau_pch_day completed


In [None]:
def csv_to_shp(csv_file, output_wfs_dir, output_shp):

    os.makedirs(output_wfs_dir, exist_ok=True)

    data = pd.read_csv(csv_file)

    columns_to_drop = ['ch_01:00', 'klasifikasi_hujan', 'status_akhir']
    data = data.drop(columns=[col for col in columns_to_drop if col in data.columns], errors='ignore')

    if 'latitude' in data.columns and 'longitude' in data.columns:
        data['geometry'] = data.apply(lambda row: Point(row['longitude'], row['latitude']), axis=1)

        gdf = gpd.GeoDataFrame(data, geometry='geometry')
        gdf.set_crs(epsg=4326, inplace=True)
        gdf.to_file(output_shp, driver='ESRI Shapefile')

        print(f"Successfully created vektor data {output_shp}")
    else:
        print("Column 'latitude' or 'longitude' was not found in the CSV file")

start_date = (dates[11+n]).strftime('%m%d%Y')

output_balai_dir = f"../repository/post-processing/wfs/balai/pch_balai_{start_date}/"
output_pulau_dir = f"../repository/post-processing/wfs/pulau/pch_pulau_{start_date}/"

csv_to_shp(
    csv_file=f"../repository/processing/day/balai/balai_pch_day_{start_date}_2200.csv",
    output_wfs_dir=output_balai_dir,
    output_shp=f"{output_balai_dir}balai_pch_day_{start_date}_2200.shp"
)

csv_to_shp(
    csv_file=f"../repository/processing/day/pulau/pulau_pch_day_{start_date}_2200.csv",
    output_wfs_dir=output_pulau_dir,
    output_shp=f"{output_pulau_dir}pulau_pch_day_{start_date}_2200.shp"
)

Successfully created vektor data ../repository/post-processing/wfs/balai/pch_balai_03062025/balai_pch_day_03062025_2200.shp
Successfully created vektor data ../repository/post-processing/wfs/pulau/pch_pulau_03062025/pulau_pch_day_03062025_2200.shp


In [None]:
shp_dirs = [f"../repository/post-processing/wfs/balai/pch_balai_{start_date}/", f"../repository/post-processing/wfs/pulau/pch_pulau_{start_date}/"]
tif_dirs = [f"../repository/post-processing/wms/day/pch_day_{start_date}/", f"../repository/post-processing/wms/hour/pch_hour_{start_date}/"]
geoserver_endpoint = "http://admin:geoserver@127.0.0.1:8080/geoserver"
workspace = "demo_simadu"

def upload_to_geoserver(data_path, store_name, geoserver_endpoint, workspace):
    file_extension = os.path.splitext(data_path)[1].lower()
    if file_extension == ".shp":
        file_type = "shp"
        store_type = "datastores"
    elif file_extension == ".tif":
        file_type = "geotiff"
        store_type = "coveragestores"
    else:
        print(f"File type {file_extension} not supported for upload.")
        return False

    absolute_path = os.path.abspath(data_path).replace("\\", "/")
    url = f"{geoserver_endpoint}/rest/workspaces/{workspace}/{store_type}/{store_name}/external.{file_type}"

    headers = {"Content-type": "text/plain"}
    response = requests.put(url, data=f"file://{absolute_path}", headers=headers, auth=("admin", "geoserver"))

    if response.status_code in [200, 201]:
        print(f"Successfully uploaded {data_path} to geoserver")
        return True
    else:
        print(f"Failed to upload {data_path} to geoserver. Status code: {response.status_code}")
        return False

def process_and_upload_to_geoserver(shp_dirs, tif_dirs, geoserver_endpoint, workspace):
    for shp_dir in shp_dirs:
        shp_files = [os.path.join(shp_dir, file) for file in os.listdir(shp_dir) if file.endswith('.shp')]
        for shp_file in shp_files:
            store_name = os.path.splitext(os.path.basename(shp_file))[0]
            upload_to_geoserver(shp_file, store_name, geoserver_endpoint, workspace)

    for tif_dir in tif_dirs:
        tif_files = [os.path.join(tif_dir, file) for file in os.listdir(tif_dir) if file.endswith('.tif')]
        for tif_file in tif_files:
            store_name = os.path.splitext(os.path.basename(tif_file))[0]
            upload_to_geoserver(tif_file, store_name, geoserver_endpoint, workspace)

process_and_upload_to_geoserver(
    shp_dirs,
    tif_dirs,
    geoserver_endpoint,
    workspace,
)

Successfully uploaded ../repository/post-processing/wfs/balai/pch_balai_03062025/balai_pch_day_03062025_2200.shp to geoserver
Successfully uploaded ../repository/post-processing/wfs/pulau/pch_pulau_03062025/pulau_pch_day_03062025_2200.shp to geoserver
Successfully uploaded ../repository/post-processing/wms/day/pch_day_03062025/pch_day_03062025.tif to geoserver
Successfully uploaded ../repository/post-processing/wms/hour/pch_hour_03062025/pch_hour_03062025_0100_22.00.tif to geoserver
Successfully uploaded ../repository/post-processing/wms/hour/pch_hour_03062025/pch_hour_03062025_0400_22.00.tif to geoserver
Successfully uploaded ../repository/post-processing/wms/hour/pch_hour_03062025/pch_hour_03062025_0700_22.00.tif to geoserver
Successfully uploaded ../repository/post-processing/wms/hour/pch_hour_03062025/pch_hour_03062025_1000_22.00.tif to geoserver
Successfully uploaded ../repository/post-processing/wms/hour/pch_hour_03062025/pch_hour_03062025_1300_22.00.tif to geoserver
Successfully