In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model


# Load model generated by `build_model.ipynb`

In [2]:
model = load_model("best_model.h5")


# Read data generated by `get_data.ipynb`, and do the same data preparation work as `build_model.ipynb`

In [3]:
no_label_data = pd.read_csv("no_label_data.csv")
no_label_data["time"] = pd.to_datetime(no_label_data["time"])


In [4]:
pivoted_vh_non_labeled = no_label_data.pivot(
    index=["lat_and_long", "type"], columns="time", values="vh"
)
pivoted_vv_non_labeled = no_label_data.pivot(
    index=["lat_and_long", "type"], columns="time", values="vv"
)


Fill missing value.

In [5]:
pivoted_vh_non_labeled = pivoted_vh_non_labeled.interpolate(
    method="time", axis=1
).fillna(method="bfill", axis=1)
pivoted_vv_non_labeled = pivoted_vv_non_labeled.interpolate(
    method="time", axis=1
).fillna(method="bfill", axis=1)


In [6]:
pivoted_vv_non_labeled = pivoted_vv_non_labeled.reset_index()
pivoted_vh_non_labeled = pivoted_vh_non_labeled.reset_index()
pivoted_vh_non_labeled.columns.rename(None, inplace=True)
pivoted_vv_non_labeled.columns.rename(None, inplace=True)

print("Non-labeled locations' vh:")
print(f"Size: {pivoted_vh_non_labeled.shape}")
display(pivoted_vh_non_labeled.head())
print("--------------------")

print("Non-labeled locations' vv:")
print(f"Size: {pivoted_vv_non_labeled.shape}")
display(pivoted_vv_non_labeled.head())


Non-labeled locations' vh:
Size: (250, 150)


Unnamed: 0,lat_and_long,type,2021-01-02 00:00:00,2021-01-03 00:00:00,2021-01-08 00:00:00,2021-01-14 00:00:00,2021-01-15 00:00:00,2021-01-20 00:00:00,2021-01-26 00:00:00,2021-01-27 00:00:00,...,2022-11-05 00:00:00,2022-11-06 00:00:00,2022-11-17 00:00:00,2022-11-18 00:00:00,2022-11-29 00:00:00,2022-11-30 00:00:00,2022-12-11 00:00:00,2022-12-12 00:00:00,2022-12-23 00:00:00,2022-12-24 00:00:00
0,"(10.15475392244798, 105.29932863170458)",,0.023868,0.026939,0.018105,0.015798,0.016411,0.020235,0.022775,0.013857,...,0.00696,0.002811,0.006922,0.003684,0.006552,0.005159,0.010108,0.011453,0.013497,0.007426
1,"(10.157025066596281, 105.31250126776416)",,0.025415,0.017338,0.019064,0.019012,0.019914,0.015031,0.017,0.013068,...,0.004506,0.002526,0.004634,0.003789,0.00854,0.013305,0.00771,0.004034,0.014997,0.003909
2,"(10.159296210744582, 105.31204703893451)",,0.01545,0.018538,0.037371,0.027198,0.023866,0.018861,0.018824,0.042241,...,0.00403,0.003524,0.005063,0.004555,0.006431,0.008836,0.009223,0.004337,0.015337,0.010332
3,"(10.160204668403901, 105.29524057223784)",,0.013442,0.024051,0.013149,0.023387,0.017419,0.015627,0.026167,0.023012,...,0.003406,0.003859,0.006067,0.003352,0.006592,0.004965,0.01065,0.006456,0.015204,0.013864
4,"(10.16111312606322, 105.29433211457857)",,0.02326,0.026049,0.018701,0.016434,0.016148,0.017184,0.016675,0.01623,...,0.006414,0.003783,0.003618,0.002465,0.00599,0.005196,0.007769,0.006742,0.009835,0.011362


--------------------
Non-labeled locations' vv:
Size: (250, 150)


Unnamed: 0,lat_and_long,type,2021-01-02 00:00:00,2021-01-03 00:00:00,2021-01-08 00:00:00,2021-01-14 00:00:00,2021-01-15 00:00:00,2021-01-20 00:00:00,2021-01-26 00:00:00,2021-01-27 00:00:00,...,2022-11-05 00:00:00,2022-11-06 00:00:00,2022-11-17 00:00:00,2022-11-18 00:00:00,2022-11-29 00:00:00,2022-11-30 00:00:00,2022-12-11 00:00:00,2022-12-12 00:00:00,2022-12-23 00:00:00,2022-12-24 00:00:00
0,"(10.15475392244798, 105.29932863170458)",,0.080928,0.053877,0.060698,0.034161,0.049538,0.033195,0.025789,0.037179,...,0.015904,0.00712,0.007181,0.013616,0.011907,0.015958,0.056372,0.082392,0.145097,0.075872
1,"(10.157025066596281, 105.31250126776416)",,0.107377,0.060672,0.068771,0.060683,0.064201,0.025616,0.035393,0.051404,...,0.015706,0.009185,0.008095,0.006441,0.01947,0.037045,0.024528,0.013496,0.093859,0.109441
2,"(10.159296210744582, 105.31204703893451)",,0.131175,0.099296,0.14582,0.085775,0.135281,0.058612,0.100506,0.085229,...,0.009712,0.008768,0.006172,0.011334,0.033422,0.027526,0.016441,0.077979,0.06856,0.119217
3,"(10.160204668403901, 105.29524057223784)",,0.045972,0.118725,0.034699,0.033722,0.027856,0.028253,0.06209,0.044625,...,0.011176,0.009826,0.009054,0.013271,0.01751,0.017737,0.048195,0.082171,0.172153,0.26278
4,"(10.16111312606322, 105.29433211457857)",,0.038526,0.076737,0.067264,0.034099,0.021747,0.020173,0.025992,0.041317,...,0.022214,0.010598,0.008824,0.014992,0.014938,0.00908,0.111897,0.079921,0.212132,0.22155


In [7]:
vv_data_non_labeled = pivoted_vv_non_labeled.iloc[:, 2:].values
vh_data_non_labeled = pivoted_vh_non_labeled.iloc[:, 2:].values


Aggregate features to form model input.

In [8]:
def aggregate_features(*feature_timeseries: np.ndarray):
    return np.concatenate(
        [i.reshape(i.shape[0], i.shape[1], 1) for i in feature_timeseries], axis=-1
    )


X_need_classify = aggregate_features(vh_data_non_labeled, vv_data_non_labeled)
X_need_classify.shape  # (num_samples, num_timestamps, num_features)


(250, 148, 2)

# Make predictions and generate the submission csv file

In [9]:
final_predictions_prob = model.predict(X_need_classify)

final_predictions = []
for i in final_predictions_prob:
    if i[0] < i[1]:
        final_predictions.append("Rice")
    else:
        final_predictions.append("Non Rice")


In [10]:
final_prediction_series = pd.Series(final_predictions)
submission_df = pd.DataFrame(
    {
        "id": pivoted_vv_non_labeled["lat_and_long"].values,
        "target": final_prediction_series.values,
    }
)

submission_df.to_csv("prediction_result.csv", index=False)
submission_df


Unnamed: 0,id,target
0,"(10.15475392244798, 105.29932863170458)",Rice
1,"(10.157025066596281, 105.31250126776416)",Rice
2,"(10.159296210744582, 105.31204703893451)",Rice
3,"(10.160204668403901, 105.29524057223784)",Rice
4,"(10.16111312606322, 105.29433211457857)",Rice
...,...,...
245,"(10.823197068175638, 105.20403142324611)",Non Rice
246,"(10.823651297005297, 105.20312296558683)",Non Rice
247,"(10.823651297005297, 105.20357719441648)",Non Rice
248,"(10.823651297005297, 105.20403142324611)",Non Rice
