In [1]:
import pandas as pd
import pystac_client
from odc.stac import stac_load
import planetary_computer as pc
# pc.settings.set_subscription_key('********************************')
from tqdm import tqdm


In [2]:
crop_presence_data = pd.read_csv("Crop_Location_Data_20221201.csv")
crop_presence_data.head()


Unnamed: 0,Latitude and Longitude,Class of Land
0,"(10.323727047081501, 105.2516346045924)",Rice
1,"(10.322364360592521, 105.27843410554115)",Rice
2,"(10.321455902933202, 105.25254306225168)",Rice
3,"(10.324181275911162, 105.25118037576274)",Rice
4,"(10.324635504740822, 105.27389181724476)",Rice


In [3]:
no_label_data = pd.read_csv("challenge_1_submission_template_correct_columns_fixed.csv")
no_label_data.head()


Unnamed: 0,id,target
0,"(10.18019073690894, 105.32022315786804)",
1,"(10.561107033461816, 105.12772097986661)",
2,"(10.623790611954897, 105.13771401411867)",
3,"(10.583364246115156, 105.23946127195805)",
4,"(10.20744446668854, 105.26844107128906)",


In [4]:
def get_sentinel_data(latlong, time_slice):
    latlong = latlong.replace("(", "").replace(")", "").replace(" ", "").split(",")
    # The Sentinel-1 mission uses C-band radar at 10-meter resolution
    # 10 meters is about 0.0001 degree in latitude and longitude
    bbox_of_interest = (  # a bounding box of 3×3 pixels
        float(latlong[1]) - 0.0001,
        float(latlong[0]) - 0.0001,
        float(latlong[1]) + 0.0001,
        float(latlong[0]) + 0.0001,
    )
    catalog = pystac_client.Client.open(
        "https://planetarycomputer.microsoft.com/api/stac/v1"
    )
    search = catalog.search(
        collections=["sentinel-1-rtc"], bbox=bbox_of_interest, datetime=time_slice
    )
    items = list(search.item_collection())

    dft = pd.DataFrame(columns=["vh", "vv"])
    for item in items:
        data = stac_load([item], patch_url=pc.sign, bbox=bbox_of_interest).isel(time=0)
        # use the average value of the surrounding pixels
        vh = data["vh"].astype("float").mean().values.item()
        vv = data["vv"].astype("float").mean().values.item()
        time = data["time"].astype("datetime64[D]").values
        dft = pd.concat(
            [dft, pd.DataFrame([[vh, vv]], index=[time], columns=["vh", "vv"])]
        )

    return dft.sort_index()


Get labeled locations' vh and vv time series.

In [5]:
time_slice = "2021-01-01/2022-12-31"
all_location = []
keys = []
for index, row in tqdm(
    crop_presence_data.iterrows(), total=crop_presence_data.shape[0]
):
    single_location_time_series = get_sentinel_data(
        row["Latitude and Longitude"], time_slice
    )
    all_location.append(single_location_time_series)
    keys.append((row[0], row[1]))

whole = pd.concat(
    all_location, keys=keys, names=["lat_and_long", "type", "time"]
).reset_index()


100%|██████████| 600/600 [5:28:55<00:00, 32.89s/it]  


In [6]:
whole.to_csv("all_data.csv", index=False)
whole


Unnamed: 0,lat_and_long,type,time,vh,vv
0,"(10.323727047081501, 105.2516346045924)",Rice,2021-01-02,0.007281,0.064617
1,"(10.323727047081501, 105.2516346045924)",Rice,2021-01-03,0.012100,0.100834
2,"(10.323727047081501, 105.2516346045924)",Rice,2021-01-08,0.008796,0.302948
3,"(10.323727047081501, 105.2516346045924)",Rice,2021-01-14,0.013753,0.219833
4,"(10.323727047081501, 105.2516346045924)",Rice,2021-01-15,0.013327,0.317204
...,...,...,...,...,...
88826,"(10.012126069934741, 105.67361318732796)",Non Rice,2022-11-30,0.035776,0.229859
88827,"(10.012126069934741, 105.67361318732796)",Non Rice,2022-12-11,0.061688,0.285414
88828,"(10.012126069934741, 105.67361318732796)",Non Rice,2022-12-12,0.106474,0.210555
88829,"(10.012126069934741, 105.67361318732796)",Non Rice,2022-12-23,0.068176,0.339369


Get non-labeled locations' vh and vv time series.

In [7]:
time_slice = "2021-01-01/2022-12-31"
all_location = []
keys = []
for index, row in tqdm(no_label_data.iterrows(), total=no_label_data.shape[0]):
    single_location_time_series = get_sentinel_data(row["id"], time_slice)
    all_location.append(single_location_time_series)
    keys.append(row[0])

no_label_data = pd.concat(
    all_location, keys=keys, names=["lat_and_long", "time"]
).reset_index()
no_label_data.insert(1, "type", "")


100%|██████████| 250/250 [2:12:15<00:00, 31.74s/it]  


In [None]:
no_label_data.to_csv("no_label_data.csv", index=False)
no_label_data


Unnamed: 0,lat_and_long,type,time,vh,vv
0,"(10.18019073690894, 105.32022315786804)",,2021-01-02,0.019622,0.201935
1,"(10.18019073690894, 105.32022315786804)",,2021-01-03,0.009204,0.132878
2,"(10.18019073690894, 105.32022315786804)",,2021-01-08,0.029937,0.110144
3,"(10.18019073690894, 105.32022315786804)",,2021-01-14,0.033573,0.044857
4,"(10.18019073690894, 105.32022315786804)",,2021-01-15,0.015343,0.096504
...,...,...,...,...,...
36995,"(10.574733898351617, 105.10410108072531)",,2022-11-30,0.032845,0.101729
36996,"(10.574733898351617, 105.10410108072531)",,2022-12-11,0.092445,0.299581
36997,"(10.574733898351617, 105.10410108072531)",,2022-12-12,0.061081,0.191966
36998,"(10.574733898351617, 105.10410108072531)",,2022-12-23,0.040190,0.169888
