In [40]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import rasterio
from rasterio.sample import sample_gen

In [41]:
## Get Data 

df_ = pd.read_csv("data_compile_agro_climate.csv", sep=';')
df_ = df_[['idsubsegmen_repair', 'latitude', 'longitude', 'obs', 'tahun', 'bulan',
       'kdprov', 'idsubfinal', 'idkab', 'idkec', 'idsegmen', 'is_puso']]
df_['geometry'] = df_.apply(lambda row: Point(row['longitude'], row['latitude']), axis=1)
gdf_ = gpd.GeoDataFrame(df_, geometry='geometry')
print(gdf_.shape)
gdf_.head(2)

KeyError: "None of [Index(['idsubsegmen_repair', 'latitude', 'longitude', 'obs', 'tahun', 'bulan',\n       'kdprov', 'idsubfinal', 'idkab', 'idkec', 'idsegmen', 'is_puso'],\n      dtype='object')] are in the [columns]"

### Get Sentinel 1 Pixel Value

In [7]:
gdf_['year'] = "20" + gdf_['tahun'].astype("str")
gdf_['year'] = gdf_['year'].astype("int")
gdf_['index'] = 12*(gdf_.year-2001)+gdf_.bulan
gdf_.head(2)

Unnamed: 0,idsubsegmen_repair,latitude,longitude,obs,tahun,bulan,kdprov,idsubfinal,idkab,idkec,idsegmen,is_puso,geometry,year,index
0,320104008A1,-6.582085,106.646608,6.0,23,9,32,320104008A1239,3201,3201040,320104008,1,POINT (106.64661 -6.58209),2023,273
1,320104008A2,-6.582958,106.646563,6.0,23,9,32,320104008A2239,3201,3201040,320104008,1,POINT (106.64656 -6.58296),2023,273


In [6]:
# # Load Get Value From Raster

raster_path = "chirp/Chirp_CH_montly_Jawa_2001-2023.tif" 
with rasterio.open(raster_path) as src:
    coordinates = [(geom.x, geom.y) for geom in gdf_.geometry]
    pixel_values = list(src.sample(coordinates))
     
df_bands = pd.DataFrame(pixel_values)
df_bands.head(2)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,266,267,268,269,270,271,272,273,274,275
0,576,365,373,508,363,232,278,90,464,754,...,334,387,248,246,109,113,43,174,689,197
1,576,365,373,508,363,232,278,90,464,754,...,334,387,248,246,109,113,43,174,689,197


In [9]:
prec_list = [f"PREC_{i}" for i in range(23, -1, -1)]
df_prec = pd.DataFrame(index=gdf_.index, columns = prec_list)
print(df_prec.shape)
df_prec.head(2)

(5399, 24)


Unnamed: 0,PREC_23,PREC_22,PREC_21,PREC_20,PREC_19,PREC_18,PREC_17,PREC_16,PREC_15,PREC_14,...,PREC_9,PREC_8,PREC_7,PREC_6,PREC_5,PREC_4,PREC_3,PREC_2,PREC_1,PREC_0
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,


In [10]:
for i,r in gdf_.iterrows():
    ind = r['index']
    df_prec.iloc[i,:] = df_bands.iloc[i,ind-24:ind]
    # break

In [21]:
gdf_padi_ = gdf_[['idsubsegmen_repair', 'latitude', 'longitude', 'obs', 'tahun', 'bulan',
       'kdprov', 'idsubfinal', 'idkab', 'idkec', 'idsegmen', 'is_puso', 'geometry',
        'index']].join(df_prec)

In [34]:
gdf_padi_.drop(['latitude','longitude','geometry'], axis='columns', inplace=True)
gdf_padi_.to_csv("data_compile_agro_CHIRP.csv")

In [23]:
# # Load Get Value From Raster ANOMALY

raster_path = "chirp/Chirp_CH_montly_Jawa_2001-2023_Anomali.tif" 
with rasterio.open(raster_path) as src:
    coordinates = [(geom.x, geom.y) for geom in gdf_.geometry]
    pixel_values = list(src.sample(coordinates))
     
df_bands = pd.DataFrame(pixel_values)
df_bands.head(2)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,266,267,268,269,270,271,272,273,274,275
0,212,-25,41,73,29,20,106,-91,147,339,...,2,-48,-86,34,-63,-68,-274,-241,135,-210
1,212,-25,41,73,29,20,106,-91,147,339,...,2,-48,-86,34,-63,-68,-274,-241,135,-210


In [24]:
prec_list = [f"PREC_{i}" for i in range(23, -1, -1)]
df_prec = pd.DataFrame(index=gdf_.index, columns = prec_list)
print(df_prec.shape)
df_prec.head(2)

(5399, 24)


Unnamed: 0,PREC_23,PREC_22,PREC_21,PREC_20,PREC_19,PREC_18,PREC_17,PREC_16,PREC_15,PREC_14,...,PREC_9,PREC_8,PREC_7,PREC_6,PREC_5,PREC_4,PREC_3,PREC_2,PREC_1,PREC_0
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,


In [25]:
for i,r in gdf_.iterrows():
    ind = r['index']
    df_prec.iloc[i,:] = df_bands.iloc[i,ind-24:ind]
    # break

In [30]:
gdf_[['idsubsegmen_repair', 'latitude', 'longitude', 'obs', 'tahun', 'bulan',
       'kdprov', 'idsubfinal', 'idkab', 'idkec', 'idsegmen', 'is_puso', 'geometry',
        'index']].join(df_prec)

In [36]:
gdf_padi_.drop(['latitude','longitude','geometry'], axis='columns', inplace=True)
gdf_padi_.to_csv("data_compile_agro_CHIRP_Anomali.csv")

In [37]:
gdf_padi_.columns

Index(['idsubsegmen_repair', 'obs', 'tahun', 'bulan', 'kdprov', 'idsubfinal',
       'idkab', 'idkec', 'idsegmen', 'is_puso', 'index', 'PREC_23', 'PREC_22',
       'PREC_21', 'PREC_20', 'PREC_19', 'PREC_18', 'PREC_17', 'PREC_16',
       'PREC_15', 'PREC_14', 'PREC_13', 'PREC_12', 'PREC_11', 'PREC_10',
       'PREC_9', 'PREC_8', 'PREC_7', 'PREC_6', 'PREC_5', 'PREC_4', 'PREC_3',
       'PREC_2', 'PREC_1', 'PREC_0'],
      dtype='object')

In [49]:
df_puso.obs[1]


np.float64(6.0)

#### Get Samples


In [53]:
import pandas as pd

df_puso = pd.read_csv("/data/raw/Workshop_PUSO.csv", sep=';')
sampling_rules = {
    6.0: 2,
    1.0: 1,
    3.0: 1,
    5.0: 1
}
df_puso['obs'] = pd.to_numeric(df_puso['obs'], errors='coerce')

sampled_data = []

for obs_value, sample_size in sampling_rules.items():
    group = df_puso[df_puso['obs'] == obs_value]
    if len(group) >= sample_size:  # Ensure there are enough rows to sample
        sampled_data.append(group.sample(n=sample_size, random_state=42))  # Random seed for reproducibility
    else:
        print(f"Not enough samples for obs={obs_value}. Available: {len(group)}, Requested: {sample_size}")
df_sampled = pd.concat(sampled_data).reset_index(drop=True)
df_sampled.to_csv("sampel_puso_data.csv")

## Catatan

In [26]:
gdf_[['idsubsegmen_repair', 'latitude', 'longitude', 'obs', 'tahun', 'bulan',
       'kdprov', 'idsubfinal', 'idkab', 'idkec', 'idsegmen', 'is_puso', 'geometry',
        'index']].join(df_prec).iloc[2603:2605]

Unnamed: 0,idsubsegmen_repair,latitude,longitude,obs,tahun,bulan,kdprov,idsubfinal,idkab,idkec,...,PREC_9,PREC_8,PREC_7,PREC_6,PREC_5,PREC_4,PREC_3,PREC_2,PREC_1,PREC_0
2603,320319003C1,-6.83507,107.164869,6.0,23,12,32,320319003C12312,3203,3203190,...,-14,19,-100,-37,-27,-35,-184,-131,13,-194
2604,320505088B2,-7.45701,107.655826,6.0,23,12,32,320505088B22312,3205,3205050,...,-10,99,-46,-68,-59,-26,-120,-253,-96,-236


In [27]:
df_bands.iloc[2603:2605,]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,266,267,268,269,270,271,272,273,274,275
2603,187,-12,75,95,-6,41,7,-42,90,262,...,-14,19,-100,-37,-27,-35,-184,-131,13,-194
2604,89,-40,90,148,-26,42,-63,-34,-50,669,...,-10,99,-46,-68,-59,-26,-120,-253,-96,-236


In [28]:
df_prec.iloc[2603:2605,]

Unnamed: 0,PREC_23,PREC_22,PREC_21,PREC_20,PREC_19,PREC_18,PREC_17,PREC_16,PREC_15,PREC_14,...,PREC_9,PREC_8,PREC_7,PREC_6,PREC_5,PREC_4,PREC_3,PREC_2,PREC_1,PREC_0
2603,-91,-79,22,20,65,50,55,105,290,163,...,-14,19,-100,-37,-27,-35,-184,-131,13,-194
2604,4,-65,31,78,27,140,103,68,288,329,...,-10,99,-46,-68,-59,-26,-120,-253,-96,-236


In [29]:
gdf_.loc[(gdf_.bulan == 12) & (gdf_.tahun == 23)]

Unnamed: 0,idsubsegmen_repair,latitude,longitude,obs,tahun,bulan,kdprov,idsubfinal,idkab,idkec,idsegmen,is_puso,geometry,year,index
2603,320319003C1,-6.835070,107.164869,6.0,23,12,32,320319003C12312,3203,3203190,320319003,1,POINT (107.16487 -6.83507),2023,276
2604,320505088B2,-7.457010,107.655826,6.0,23,12,32,320505088B22312,3205,3205050,320505088,1,POINT (107.65583 -7.45701),2023,276
2605,320714005B2,-7.388961,108.475277,6.0,23,12,32,320714005B22312,3207,3207140,320714005,1,POINT (108.47528 -7.38896),2023,276
2606,320714005C1,-7.388132,108.476177,6.0,23,12,32,320714005C12312,3207,3207140,320714005,1,POINT (108.47618 -7.38813),2023,276
2607,321103115B1,-6.872500,107.788132,6.0,23,12,32,321103115B12312,3211,3211031,321103115,1,POINT (107.78813 -6.8725),2023,276
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2795,327312102C1,-6.921879,107.691642,4.0,23,12,32,327312102C12312,3273,3273121,327312102,0,POINT (107.69164 -6.92188),2023,276
2796,327703010A3,-6.864777,107.537640,8.0,23,12,32,327703010A32312,3277,3277030,327703010,0,POINT (107.53764 -6.86478),2023,276
2797,327805003A2,-7.344957,108.208097,8.0,23,12,32,327805003A22312,3278,3278050,327805003,0,POINT (108.2081 -7.34496),2023,276
2798,327808001C1,-7.316709,108.242268,8.0,23,12,32,327808001C12312,3278,3278080,327808001,0,POINT (108.24227 -7.31671),2023,276
