# Prediction in Table Format and Required Model Features

## Preparations

Install requirements

In [None]:
%pip install -r ../requirements.txt

Import libraries

In [2]:
import pandas as pd
from catboost import CatBoostClassifier

Define crop classes with their corresponding IDs

In [3]:
class_names = {
    1: 'winter wheat',
    2: 'spring oats',
    3: 'spring barley',
    4: 'spring rye',
    5: 'corn',
    6: 'soybean',
    7: 'sunflower',
    8: 'sugar beet',
    9: 'rapeseed',
    10: 'sorghum',
    11: 'potato',
    12: 'cotton',
    13: 'spring wheat',
    14: 'winter oats',
    15: 'winter barley',
    16: 'winter rye'
}

Load the input dataset with meteorological, spectral, and phenological features. The dataset includes the following columns: `field_id` - the field identifier, and all predictor variables.

In [4]:
df = pd.read_parquet('../data/processed/input_data_for_model.parquet')
df

Unnamed: 0,field_id,wrdvi_wNDVI,wrdvi_mNDVI,wrdvi_S,wrdvi_A,wrdvi_mS,wrdvi_mA,wrdvi_doy_max,wrdvi_max,wrdvi_start_of_growth,...,median_prec_10,median_prec_6,sum_prec_7,median_prec_9,sum_prec_8,sum_prec_10,median_prec_5,sum_t_6,sum_t_10,median_t_6
0,6,-0.770454,-0.212571,115.470013,144.104936,0.372099,0.070134,124.63982,-0.343853,85.672336,...,0.000139,0.000498,0.019522,6.409595e-07,0.03852,0.085601,0.003177,8780.852101,8803.422937,292.584293
1,12,-0.781273,-0.578835,117.140097,175.106576,0.50615,0.058058,127.189095,-0.591881,89.86043,...,0.000124,0.000895,0.020659,6.409595e-07,0.042608,0.087821,0.0025,8772.371632,8798.792973,292.252506
2,9,-0.75942,-0.539249,110.664507,163.327541,0.095795,0.465224,151.407204,-0.544461,86.582791,...,0.000136,0.000404,0.019922,6.409595e-07,0.032824,0.081442,0.002767,8790.043263,8808.059656,292.849102
3,0,-0.783048,-0.539082,123.84396,173.288268,0.0562,0.082034,149.222111,-0.616081,82.030515,...,0.000119,0.00059,0.01906,6.409595e-07,0.044382,0.089953,0.003027,8765.082895,8793.843592,292.058374
4,5,-0.771126,-0.622494,17.95577,161.649957,0.658932,1.0,71.287144,-0.622494,14.474737,...,0.000119,0.00059,0.01906,6.409595e-07,0.044382,0.089953,0.003027,8765.082895,8793.843592,292.058374
5,13,-0.781634,-0.560841,116.591929,174.64498,0.786737,0.10721,125.732366,-0.562167,95.687344,...,0.000119,0.00059,0.01906,6.409595e-07,0.044382,0.089953,0.003027,8765.082895,8793.843592,292.058374
6,14,-0.761041,-0.49056,128.79572,170.279089,0.139118,0.034909,148.857929,-0.593104,58.358679,...,0.000119,0.00059,0.01906,6.409595e-07,0.044382,0.089953,0.003027,8765.082895,8793.843592,292.058374


## Inference

### CropGRM-large

Select predictors for the CropGRM-large model

In [5]:
indices = (
    'red', 'nir', 'swir1', 'swir2', 'green', 'blue', 'wrdvi', 'ndre', 'ndyi',
    'median_red', 'median_nir', 'median_swir1', 'median_swir2', 'median_blue', 'median_green',
)
matched_cols = df.loc[:, df.columns.str.startswith(indices)]

cols_to_select = [
    f'{p}_{i}'
    for p in ('sum_t', 'sum_prec', 'median_t', 'median_prec')
    for i in range(4, 11)
]

cols_to_select.extend(matched_cols.columns)
sample = df.dropna(subset=cols_to_select)
pred_features = sample[cols_to_select]

Load the CropGRM-large model

In [6]:
model = CatBoostClassifier().load_model('../models/CropGRM-large.cbm')

Save prediction results to a CSV file

In [7]:
sample['class'] = model.predict(pred_features)
sample['class_name'] = sample['class'].map(class_names)
sample[['field_id', 'class_name']].to_csv('../data/final/CropGRM-large_predictions.csv', index=False)

### CropGRM-optimized

Select predictors for the CropGRM-optimized model

In [8]:
cols_to_select = [
    'sum_t_4', 'sum_t_5', 'sum_t_6', 'sum_t_7', 'sum_t_8', 'sum_t_9', 'sum_t_10',
    'sum_prec_4', 'sum_prec_5', 'sum_prec_6', 'sum_prec_7', 'sum_prec_8', 'sum_prec_9', 'sum_prec_10',
    'median_t_4', 'median_t_5', 'median_t_6', 'median_t_7', 'median_t_8', 'median_t_9', 'median_t_10',
    'median_prec_4', 'median_prec_5', 'median_prec_6', 'median_prec_7', 'median_prec_8', 'median_prec_9', 'median_prec_10',
    'wrdvi_wNDVI', 'wrdvi_S', 'wrdvi_A', 'wrdvi_mS', 'wrdvi_mA', 'wrdvi_max', 'wrdvi_end_of_growth',
    'ndre_wNDVI', 'ndre_S', 'ndre_A', 'ndre_mS', 'ndre_mA', 'ndre_max', 'ndyi_doy_max', 'ndyi_max',
    'red_min', 'red_doy_min', 'median_red_fitted_4', 'median_red_fitted_5', 'median_red_fitted_6',
    'median_red_fitted_7', 'median_red_fitted_8', 'median_red_fitted_9', 'median_red_fitted_10',
    'nir_max', 'median_nir_fitted_4', 'median_nir_fitted_5', 'median_nir_fitted_6', 'median_nir_fitted_7',
    'median_nir_fitted_8', 'median_nir_fitted_9', 'median_nir_fitted_10', 'median_blue_fitted_5',
    'median_blue_fitted_7', 'median_blue_fitted_8', 'median_blue_fitted_9', 'median_swir1_fitted_5',
    'median_swir1_fitted_6', 'median_swir1_fitted_7', 'median_swir1_fitted_8', 'median_swir1_fitted_9',
    'median_green_fitted_5', 'median_green_fitted_6', 'median_green_fitted_7', 'median_green_fitted_8',
    'median_green_fitted_9', 'swir2_min', 'median_swir2_fitted_4', 'median_swir2_fitted_5', 'median_swir2_fitted_6',
    'median_swir2_fitted_7', 'median_swir2_fitted_8', 'median_swir2_fitted_9', 'median_swir2_fitted_10',
]

sample = df.dropna(subset=cols_to_select)
pred_features = sample[cols_to_select]

Load the CropGRM-optimized model

In [9]:
model = CatBoostClassifier().load_model('../models/CropGRM-optimized.cbm')

Save the prediction results in CSV

In [10]:
sample['class'] = model.predict(pred_features)
sample['class_name'] = sample['class'].map(class_names)
sample[['field_id', 'class_name']].to_csv('../data/final/CropGRM-optimized_predictions.csv', index=False)

### CropGRM-small

Select predictors for the CropGRM-small model

In [11]:
cols_to_select = [
    'sum_t_4', 'sum_t_5', 'sum_t_6', 'sum_t_7', 'sum_t_8', 'sum_t_9', 'sum_t_10',
    'sum_prec_4', 'sum_prec_6', 'sum_prec_10', 'median_t_4', 'median_t_6', 'median_t_9',
    'median_t_10', 'ndre_S', 'median_red_fitted_8', 'median_nir_fitted_5', 'median_nir_fitted_8',
    'median_swir1_fitted_6', 'median_swir1_fitted_7', 'median_swir1_fitted_8', 'median_green_fitted_7',
    'median_green_fitted_8', 'median_swir2_fitted_5',
]

sample = df.dropna(subset=cols_to_select)
pred_features = sample[cols_to_select]

Load the CropGRM-small model

In [12]:
model = CatBoostClassifier().load_model('../models/CropGRM-small.cbm')

Save prediction results to a CSV file

In [13]:
sample['class'] = model.predict(pred_features)
sample['class_name']=sample['class'].map(class_names)
sample[['field_id', 'class_name']].to_csv('../data/final/CropGRM-small_predictions.csv', index=False)