In [5]:
import numpy as np
import pandas as pd
import xarray as xr
import seaborn as sns

from pathlib import Path
from tqdm.autonotebook import tqdm
from sklearn.model_selection import train_test_split         # Split data into train and test set

from utils import evaluate_classifier, get_sklearn_df 

# Automatically prints execution time for the individual cells
%load_ext autotime

# Automatically reloads functions defined in external files
%load_ext autoreload
%autoreload 2

# Set xarray to use html as display_style
xr.set_options(display_style="html")

# Tell matplotlib to plot directly in the notebook
%matplotlib inline  

# The path to the project (so absoute file paths can be used throughout the notebook)
PROJ_PATH = Path.cwd().parent

# Set seed for random generators
RANDOM_SEED = 42

The autotime extension is already loaded. To reload it, use:
  %reload_ext autotime
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
time: 29 ms


In [6]:
netcdf_path = (PROJ_PATH / 'data' / 'processed' / 'FieldPolygons2019_stats').with_suffix('.nc')
ds = xr.open_dataset(netcdf_path, engine="h5netcdf")
ds  # Remember to close the dataset before the netcdf file can be rewritten in cells above

time: 161 ms


In [7]:
ds.close()

time: 35.2 ms


In [8]:
# Convert the xarray dataset to pandas dataframe
df = ds.to_dataframe()
df = df.reset_index()  # Removes MultiIndex
df = df.drop(columns=['cvr', 'gb', 'gbanmeldt', 'journalnr', 'marknr', 'pass_mode', 'relative_orbit'])
df = df.dropna()

time: 11.6 s


In [None]:
year = 2018
for i in range(7, 24, 1):
    month = (i % 12) + 1
    if month == 1:
        year += 1
        
    print(f"--------------------------------------------------------------------------------------------------")
    print(f"Dataset from 2018-07-01 to {year}-{month:02}-01")
    df_sklearn = get_sklearn_df(polygons_year=2019, 
                                satellite_dates=slice('2018-07-01', f'{year}-{month:02}-01'), 
                                fields='all', 
                                satellite='all', 
                                polarization='all',
                                crop_type='all',
                                netcdf_path=netcdf_path)

    df_sklearn = df_sklearn[df_sklearn['afgroede'].isin(['Vårbyg', 'Vinterhvede', 'Silomajs', 'Vinterraps', 
                                                         'Vinterbyg', 'Vårhavre', 'Vinterhybridrug'])]
    crop_codes = df_sklearn['afgkode'].unique()
    mapping_dict = {}
    class_names = [] 

    for i, crop_code in enumerate(crop_codes):
        mapping_dict[crop_code] = i
        crop_type = df_sklearn[df_sklearn['afgkode'] == crop_code].head(1)['afgroede'].values[0]
        class_names.append(crop_type)

    df_sklearn_remapped = df_sklearn.copy()
    df_sklearn_remapped['afgkode'] = df_sklearn_remapped['afgkode'].map(mapping_dict)
    #print(f"Crop types: {class_names}")

    array = df_sklearn_remapped.values

    # Define the independent variables as features.
    X = np.float32(array[:,3:])  # The features 

    # Define the target (dependent) variable as labels.
    y = np.int8(array[:,1])  # The column 'afgkode'

    # Create a train/test split using 30% test size.
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=RANDOM_SEED)

    #print(f"Train samples:      {len(y_train)}")
    #print(f"Test samples:       {len(y_test)}")
    #print(f"Number of features: {len(X[0,:])}")

    from sklearn.linear_model import LogisticRegression          

    # Instantiate and evaluate classifier
    clf = LogisticRegression(solver='lbfgs', multi_class='auto', n_jobs=10, max_iter=1000)
    clf_trained = evaluate_classifier(clf, X_train, X_test, y_train, y_test, class_names, feature_scale=True, plot_confusion_matrix=False)

In [29]:
year = 2018
for i in range(7, 24, 1):
    month = (i % 12) + 1
    if month == 1:
        year += 1
        
    print(f"--------------------------------------------------------------------------------------------------")
    print(f"Dataset from 2018-07-01 to {year}-{month:02}-01")
    df_sklearn = get_sklearn_df(polygons_year=2019, 
                                satellite_dates=slice('2018-07-01', f'{year}-{month:02}-01'), 
                                fields='all', 
                                satellite='all', 
                                polarization='all',
                                crop_type='all',
                                netcdf_path=netcdf_path)

    df_sklearn = df_sklearn[df_sklearn['afgroede'].isin(['Vårbyg', 'Vinterhvede', 'Silomajs', 'Vinterraps', 
                                                         'Vinterbyg', 'Vårhavre', 'Vinterhybridrug'])]
    crop_codes = df_sklearn['afgkode'].unique()
    mapping_dict = {}
    class_names = [] 

    for i, crop_code in enumerate(crop_codes):
        mapping_dict[crop_code] = i
        crop_type = df_sklearn[df_sklearn['afgkode'] == crop_code].head(1)['afgroede'].values[0]
        class_names.append(crop_type)

    df_sklearn_remapped = df_sklearn.copy()
    df_sklearn_remapped['afgkode'] = df_sklearn_remapped['afgkode'].map(mapping_dict)
    #print(f"Crop types: {class_names}")

    array = df_sklearn_remapped.values

    # Define the independent variables as features.
    X = np.float32(array[:,3:])  # The features 

    # Define the target (dependent) variable as labels.
    y = np.int8(array[:,1])  # The column 'afgkode'

    # Create a train/test split using 30% test size.
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    #print(f"Train samples:      {len(y_train)}")
    #print(f"Test samples:       {len(y_test)}")
    #print(f"Number of features: {len(X[0,:])}")

    from sklearn.linear_model import LogisticRegressionCV          

    # Instantiate and evaluate classifier
    clf = LogisticRegressionCV(solver='lbfgs', multi_class='auto', cv=10, n_jobs=10, random_state=RANDOM_SEED, max_iter=1000)
    clf_trained = evaluate_classifier(clf, X_train, X_test, y_train, y_test, class_names, feature_scale=True, plot_confusion_matrix=False)

--------------------------------------------------------------------------------------------------
Dataset from 2018-07-01 to 2018-08-01
Training time: 11.2718 s
Prediction time: 0.0016 s

Report:

Train accuracy: 0.3525
Test accuracy: 0.3559

                  precision    recall  f1-score   support

         Vårbyg       0.19      0.08      0.11      1149
    Vinterhvede       0.38      0.20      0.26      1122
       Silomajs       0.48      0.70      0.57      1432
     Vinterraps       0.35      0.32      0.34      1095
      Vinterbyg       0.26      0.39      0.31      1306
       Vårhavre       0.28      0.27      0.28      1376
Vinterhybridrug       0.42      0.44      0.43      1415

       accuracy                           0.36      8895
      macro avg       0.34      0.34      0.33      8895
   weighted avg       0.34      0.36      0.34      8895

--------------------------------------------------------------------------------------------------
Dataset from 2018-07-01 to



Training time: 163.1251 s
Prediction time: 0.0043 s

Report:

Train accuracy: 0.754
Test accuracy: 0.7339

                  precision    recall  f1-score   support

         Vårbyg       0.53      0.48      0.51      1182
    Vinterhvede       0.77      0.79      0.78      1135
       Silomajs       0.74      0.76      0.75      1444
     Vinterraps       0.98      0.99      0.98      1114
      Vinterbyg       0.83      0.82      0.83      1293
       Vårhavre       0.53      0.54      0.54      1336
Vinterhybridrug       0.77      0.77      0.77      1382

       accuracy                           0.73      8886
      macro avg       0.74      0.74      0.74      8886
   weighted avg       0.73      0.73      0.73      8886

--------------------------------------------------------------------------------------------------
Dataset from 2018-07-01 to 2019-06-01
Training time: 165.6833 s
Prediction time: 0.0041 s

Report:

Train accuracy: 0.8611
Test accuracy: 0.8528

                 

In [30]:
year = 2018
for i in range(7, 24, 1):
    month = (i % 12) + 1
    if month == 1:
        year += 1
        
    print(f"--------------------------------------------------------------------------------------------------")
    print(f"Dataset from 2018-07-01 to {year}-{month:02}-01")
    df_sklearn = get_sklearn_df(polygons_year=2019, 
                                satellite_dates=slice('2018-07-01', f'{year}-{month:02}-01'), 
                                fields='all', 
                                satellite='all', 
                                polarization='all',
                                crop_type='all',
                                netcdf_path=netcdf_path)

    df_sklearn = df_sklearn[df_sklearn['afgroede'].isin(['Vårbyg', 'Vinterhvede', 'Silomajs', 'Vinterraps', 
                                                         'Vinterbyg', 'Vårhavre', 'Vinterhybridrug'])]
    crop_codes = df_sklearn['afgkode'].unique()
    mapping_dict = {}
    class_names = [] 

    for i, crop_code in enumerate(crop_codes):
        mapping_dict[crop_code] = i
        crop_type = df_sklearn[df_sklearn['afgkode'] == crop_code].head(1)['afgroede'].values[0]
        class_names.append(crop_type)

    df_sklearn_remapped = df_sklearn.copy()
    df_sklearn_remapped['afgkode'] = df_sklearn_remapped['afgkode'].map(mapping_dict)
    #print(f"Crop types: {class_names}")

    array = df_sklearn_remapped.values

    # Define the independent variables as features.
    X = np.float32(array[:,3:])  # The features 

    # Define the target (dependent) variable as labels.
    y = np.int8(array[:,1])  # The column 'afgkode'

    # Create a train/test split using 30% test size.
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    #print(f"Train samples:      {len(y_train)}")
    #print(f"Test samples:       {len(y_test)}")
    #print(f"Number of features: {len(X[0,:])}")

    from sklearn.linear_model import LogisticRegressionCV          

    # Instantiate and evaluate classifier
    clf = LogisticRegressionCV(solver='lbfgs', multi_class='auto', n_jobs=10, cv=10, random_state=RANDOM_SEED, max_iter=1000, class_weight='balanced')
    clf_trained = evaluate_classifier(clf, X_train, X_test, y_train, y_test, class_names, feature_scale=True, plot_confusion_matrix=False)

--------------------------------------------------------------------------------------------------
Dataset from 2018-07-01 to 2018-08-01
Training time: 5.6656 s
Prediction time: 0.001 s

Report:

Train accuracy: 0.3499
Test accuracy: 0.3441

                  precision    recall  f1-score   support

         Vårbyg       0.20      0.13      0.16      1190
    Vinterhvede       0.32      0.27      0.30      1113
       Silomajs       0.49      0.68      0.57      1406
     Vinterraps       0.28      0.41      0.34      1119
      Vinterbyg       0.27      0.28      0.27      1323
       Vårhavre       0.27      0.23      0.25      1326
Vinterhybridrug       0.45      0.37      0.40      1418

       accuracy                           0.34      8895
      macro avg       0.33      0.34      0.33      8895
   weighted avg       0.33      0.34      0.33      8895

--------------------------------------------------------------------------------------------------
Dataset from 2018-07-01 to 2

In [31]:
year = 2018
for i in range(7, 24, 1):
    month = (i % 12) + 1
    if month == 1:
        year += 1
        
    print(f"--------------------------------------------------------------------------------------------------")
    print(f"Dataset from 2018-07-01 to {year}-{month:02}-01")
    df_sklearn = get_sklearn_df(polygons_year=2019, 
                                satellite_dates=slice('2018-07-01', f'{year}-{month:02}-01'), 
                                fields='all', 
                                satellite='all', 
                                polarization='all',
                                crop_type='all',
                                netcdf_path=netcdf_path)

    df_sklearn = df_sklearn[df_sklearn['afgroede'].isin(['Vårbyg', 'Vinterhvede', 'Silomajs', 'Vinterraps', 
                                                         'Vinterbyg', 'Vårhavre', 'Vinterhybridrug'])]
    crop_codes = df_sklearn['afgkode'].unique()
    mapping_dict = {}
    class_names = [] 

    for i, crop_code in enumerate(crop_codes):
        mapping_dict[crop_code] = i
        crop_type = df_sklearn[df_sklearn['afgkode'] == crop_code].head(1)['afgroede'].values[0]
        class_names.append(crop_type)

    df_sklearn_remapped = df_sklearn.copy()
    df_sklearn_remapped['afgkode'] = df_sklearn_remapped['afgkode'].map(mapping_dict)
    #print(f"Crop types: {class_names}")

    array = df_sklearn_remapped.values

    # Define the independent variables as features.
    X = np.float32(array[:,3:])  # The features 

    # Define the target (dependent) variable as labels.
    y = np.int8(array[:,1])  # The column 'afgkode'

    # Create a train/test split using 30% test size.
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=RANDOM_SEED)

    #print(f"Train samples:      {len(y_train)}")
    #print(f"Test samples:       {len(y_test)}")
    #print(f"Number of features: {len(X[0,:])}")

    from sklearn.svm import SVC   

    # Instantiate and evaluate classifier
    clf = SVC(kernel='rbf')
    clf_trained = evaluate_classifier(clf, X_train, X_test, y_train, y_test, class_names, feature_scale=True, plot_confusion_matrix=False)

--------------------------------------------------------------------------------------------------
Dataset from 2018-07-01 to 2018-08-01
Training time: 25.5804 s
Prediction time: 5.5676 s

Report:

Train accuracy: 0.4012
Test accuracy: 0.3794

                  precision    recall  f1-score   support

         Vårbyg       0.28      0.13      0.18      1192
    Vinterhvede       0.57      0.21      0.31      1092
       Silomajs       0.67      0.58      0.62      1469
     Vinterraps       0.48      0.29      0.36      1125
      Vinterbyg       0.24      0.52      0.33      1293
       Vårhavre       0.28      0.42      0.33      1312
Vinterhybridrug       0.48      0.41      0.45      1412

       accuracy                           0.38      8895
      macro avg       0.43      0.37      0.37      8895
   weighted avg       0.43      0.38      0.38      8895

--------------------------------------------------------------------------------------------------
Dataset from 2018-07-01 to

In [35]:
year = 2018
for i in range(7, 24, 1):
    month = (i % 12) + 1
    if month == 1:
        year += 1
        
    print(f"--------------------------------------------------------------------------------------------------")
    print(f"Dataset from 2018-07-01 to {year}-{month:02}-01")
    df_sklearn = get_sklearn_df(polygons_year=2019, 
                                satellite_dates=slice('2018-07-01', f'{year}-{month:02}-01'), 
                                fields='all', 
                                satellite='all', 
                                polarization='all',
                                crop_type='all',
                                netcdf_path=netcdf_path)

    df_sklearn = df_sklearn[df_sklearn['afgroede'].isin(['Vårbyg', 'Vinterhvede', 'Silomajs', 'Vinterraps', 
                                                         'Vinterbyg', 'Vårhavre', 'Vinterhybridrug'])]
    crop_codes = df_sklearn['afgkode'].unique()
    mapping_dict = {}
    class_names = [] 

    for i, crop_code in enumerate(crop_codes):
        mapping_dict[crop_code] = i
        crop_type = df_sklearn[df_sklearn['afgkode'] == crop_code].head(1)['afgroede'].values[0]
        class_names.append(crop_type)

    df_sklearn_remapped = df_sklearn.copy()
    df_sklearn_remapped['afgkode'] = df_sklearn_remapped['afgkode'].map(mapping_dict)
    #print(f"Crop types: {class_names}")

    array = df_sklearn_remapped.values

    # Define the independent variables as features.
    X = np.float32(array[:,3:])  # The features 

    # Define the target (dependent) variable as labels.
    y = np.int8(array[:,1])  # The column 'afgkode'

    # Create a train/test split using 30% test size.
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=RANDOM_SEED)

    #print(f"Train samples:      {len(y_train)}")
    #print(f"Test samples:       {len(y_test)}")
    #print(f"Number of features: {len(X[0,:])}")

    from sklearn.model_selection import GridSearchCV
    param_grid = {'C': [1, 10, 100], 'gamma': [0.001, 0.01, 0.1], 'kernel': ['rbf']}
    grid = GridSearchCV(SVC(), param_grid, refit=True, cv=5, verbose=2, n_jobs=16)

    grid_trained, _ = evaluate_classifier(grid, X_train, X_test, y_train, y_test, class_names, feature_scale=True)

--------------------------------------------------------------------------------------------------
Dataset from 2018-07-01 to 2018-08-01
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done   9 tasks      | elapsed:   36.4s
[Parallel(n_jobs=16)]: Done  37 out of  45 | elapsed:  1.8min remaining:   23.7s
[Parallel(n_jobs=16)]: Done  45 out of  45 | elapsed:  2.8min finished


Training time: 194.1121 s
Prediction time: 5.4108 s

Report:

Train accuracy: 0.4139
Test accuracy: 0.38

                  precision    recall  f1-score   support

         Vårbyg       0.27      0.14      0.18      1192
    Vinterhvede       0.53      0.22      0.31      1092
       Silomajs       0.67      0.58      0.62      1469
     Vinterraps       0.48      0.30      0.37      1125
      Vinterbyg       0.24      0.51      0.33      1293
       Vårhavre       0.28      0.41      0.33      1312
Vinterhybridrug       0.47      0.42      0.45      1412

       accuracy                           0.38      8895
      macro avg       0.42      0.37      0.37      8895
   weighted avg       0.43      0.38      0.38      8895

--------------------------------------------------------------------------------------------------
Dataset from 2018-07-01 to 2018-09-01
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done   9 tasks      | elapsed:   49.2s
[Parallel(n_jobs=16)]: Done  37 out of  45 | elapsed:  2.2min remaining:   28.6s
[Parallel(n_jobs=16)]: Done  45 out of  45 | elapsed:  2.4min finished


Training time: 178.6734 s
Prediction time: 7.5917 s

Report:

Train accuracy: 0.6292
Test accuracy: 0.4641

                  precision    recall  f1-score   support

         Vårbyg       0.32      0.22      0.26      1192
    Vinterhvede       0.49      0.32      0.39      1091
       Silomajs       0.71      0.66      0.68      1472
     Vinterraps       0.53      0.75      0.62      1116
      Vinterbyg       0.33      0.42      0.37      1280
       Vårhavre       0.37      0.36      0.36      1346
Vinterhybridrug       0.48      0.49      0.49      1394

       accuracy                           0.46      8891
      macro avg       0.46      0.46      0.45      8891
   weighted avg       0.46      0.46      0.46      8891

--------------------------------------------------------------------------------------------------
Dataset from 2018-07-01 to 2018-10-01
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done   9 tasks      | elapsed:   43.7s
[Parallel(n_jobs=16)]: Done  37 out of  45 | elapsed:  2.1min remaining:   26.6s
[Parallel(n_jobs=16)]: Done  45 out of  45 | elapsed:  2.4min finished


Training time: 170.6335 s
Prediction time: 8.216 s

Report:

Train accuracy: 0.6604
Test accuracy: 0.5751

                  precision    recall  f1-score   support

         Vårbyg       0.41      0.40      0.41      1191
    Vinterhvede       0.52      0.47      0.49      1092
       Silomajs       0.76      0.63      0.69      1461
     Vinterraps       0.81      0.85      0.83      1142
      Vinterbyg       0.49      0.57      0.53      1281
       Vårhavre       0.45      0.51      0.48      1337
Vinterhybridrug       0.62      0.59      0.60      1386

       accuracy                           0.58      8890
      macro avg       0.58      0.57      0.58      8890
   weighted avg       0.58      0.58      0.58      8890

--------------------------------------------------------------------------------------------------
Dataset from 2018-07-01 to 2018-11-01
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done   9 tasks      | elapsed:   53.1s
[Parallel(n_jobs=16)]: Done  37 out of  45 | elapsed:  2.4min remaining:   31.5s
[Parallel(n_jobs=16)]: Done  45 out of  45 | elapsed:  3.1min finished


Training time: 207.2534 s
Prediction time: 9.6013 s

Report:

Train accuracy: 0.7687
Test accuracy: 0.6491

                  precision    recall  f1-score   support

         Vårbyg       0.45      0.46      0.46      1189
    Vinterhvede       0.62      0.63      0.63      1092
       Silomajs       0.76      0.66      0.71      1468
     Vinterraps       0.93      0.95      0.94      1128
      Vinterbyg       0.63      0.66      0.64      1250
       Vårhavre       0.50      0.54      0.52      1363
Vinterhybridrug       0.68      0.67      0.68      1398

       accuracy                           0.65      8888
      macro avg       0.65      0.65      0.65      8888
   weighted avg       0.65      0.65      0.65      8888

--------------------------------------------------------------------------------------------------
Dataset from 2018-07-01 to 2018-12-01
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done   9 tasks      | elapsed:   56.7s
[Parallel(n_jobs=16)]: Done  37 out of  45 | elapsed:  2.7min remaining:   35.1s
[Parallel(n_jobs=16)]: Done  45 out of  45 | elapsed:  3.5min finished


Training time: 231.9623 s
Prediction time: 10.2674 s

Report:

Train accuracy: 0.8162
Test accuracy: 0.6755

                  precision    recall  f1-score   support

         Vårbyg       0.45      0.48      0.46      1189
    Vinterhvede       0.65      0.67      0.66      1092
       Silomajs       0.77      0.69      0.73      1468
     Vinterraps       0.97      0.96      0.96      1128
      Vinterbyg       0.69      0.72      0.70      1250
       Vårhavre       0.52      0.54      0.53      1363
Vinterhybridrug       0.72      0.69      0.71      1398

       accuracy                           0.68      8888
      macro avg       0.68      0.68      0.68      8888
   weighted avg       0.68      0.68      0.68      8888

--------------------------------------------------------------------------------------------------
Dataset from 2018-07-01 to 2019-01-01
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done   9 tasks      | elapsed:  1.1min
[Parallel(n_jobs=16)]: Done  37 out of  45 | elapsed:  3.5min remaining:   45.4s
[Parallel(n_jobs=16)]: Done  45 out of  45 | elapsed:  4.4min finished


Training time: 291.3788 s
Prediction time: 12.0964 s

Report:

Train accuracy: 0.8716
Test accuracy: 0.6932

                  precision    recall  f1-score   support

         Vårbyg       0.47      0.47      0.47      1189
    Vinterhvede       0.68      0.70      0.69      1092
       Silomajs       0.77      0.71      0.74      1467
     Vinterraps       0.98      0.97      0.97      1129
      Vinterbyg       0.74      0.74      0.74      1249
       Vårhavre       0.53      0.55      0.54      1364
Vinterhybridrug       0.72      0.74      0.73      1398

       accuracy                           0.69      8888
      macro avg       0.70      0.70      0.70      8888
   weighted avg       0.70      0.69      0.69      8888

--------------------------------------------------------------------------------------------------
Dataset from 2018-07-01 to 2019-02-01
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done   9 tasks      | elapsed:  1.3min
[Parallel(n_jobs=16)]: Done  37 out of  45 | elapsed:  4.3min remaining:   55.2s
[Parallel(n_jobs=16)]: Done  45 out of  45 | elapsed:  5.3min finished


Training time: 349.1709 s
Prediction time: 13.9295 s

Report:

Train accuracy: 0.913
Test accuracy: 0.7044

                  precision    recall  f1-score   support

         Vårbyg       0.47      0.47      0.47      1188
    Vinterhvede       0.70      0.71      0.70      1092
       Silomajs       0.76      0.70      0.73      1469
     Vinterraps       0.98      0.97      0.97      1134
      Vinterbyg       0.79      0.78      0.78      1297
       Vårhavre       0.51      0.55      0.53      1323
Vinterhybridrug       0.75      0.76      0.75      1384

       accuracy                           0.70      8887
      macro avg       0.71      0.71      0.71      8887
   weighted avg       0.71      0.70      0.71      8887

--------------------------------------------------------------------------------------------------
Dataset from 2018-07-01 to 2019-03-01
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done   9 tasks      | elapsed:  1.3min
[Parallel(n_jobs=16)]: Done  37 out of  45 | elapsed:  5.5min remaining:  1.2min
[Parallel(n_jobs=16)]: Done  45 out of  45 | elapsed:  6.6min finished


Training time: 430.6615 s
Prediction time: 15.3524 s

Report:

Train accuracy: 0.94
Test accuracy: 0.7153

                  precision    recall  f1-score   support

         Vårbyg       0.46      0.46      0.46      1188
    Vinterhvede       0.74      0.74      0.74      1092
       Silomajs       0.75      0.71      0.73      1469
     Vinterraps       0.98      0.97      0.98      1135
      Vinterbyg       0.81      0.82      0.82      1258
       Vårhavre       0.51      0.53      0.52      1336
Vinterhybridrug       0.78      0.80      0.79      1408

       accuracy                           0.72      8886
      macro avg       0.72      0.72      0.72      8886
   weighted avg       0.72      0.72      0.72      8886

--------------------------------------------------------------------------------------------------
Dataset from 2018-07-01 to 2019-04-01
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done   9 tasks      | elapsed:  1.4min
[Parallel(n_jobs=16)]: Done  37 out of  45 | elapsed:  5.1min remaining:  1.1min
[Parallel(n_jobs=16)]: Done  45 out of  45 | elapsed:  6.8min finished


Training time: 648.4289 s
Prediction time: 71.1669 s

Report:

Train accuracy: 0.9661
Test accuracy: 0.7469

                  precision    recall  f1-score   support

         Vårbyg       0.50      0.51      0.50      1188
    Vinterhvede       0.79      0.79      0.79      1092
       Silomajs       0.78      0.75      0.76      1469
     Vinterraps       0.98      0.97      0.98      1135
      Vinterbyg       0.85      0.85      0.85      1264
       Vårhavre       0.54      0.55      0.54      1338
Vinterhybridrug       0.82      0.83      0.82      1400

       accuracy                           0.75      8886
      macro avg       0.75      0.75      0.75      8886
   weighted avg       0.75      0.75      0.75      8886

--------------------------------------------------------------------------------------------------
Dataset from 2018-07-01 to 2019-05-01
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done   9 tasks      | elapsed:  4.1min
[Parallel(n_jobs=16)]: Done  37 out of  45 | elapsed: 18.5min remaining:  4.0min
[Parallel(n_jobs=16)]: Done  45 out of  45 | elapsed: 25.4min finished


Training time: 1705.9983 s
Prediction time: 54.7884 s

Report:

Train accuracy: 0.8857
Test accuracy: 0.7791

                  precision    recall  f1-score   support

         Vårbyg       0.53      0.50      0.51      1188
    Vinterhvede       0.83      0.85      0.84      1092
       Silomajs       0.80      0.81      0.81      1469
     Vinterraps       0.99      0.99      0.99      1135
      Vinterbyg       0.89      0.88      0.89      1264
       Vårhavre       0.57      0.60      0.58      1337
Vinterhybridrug       0.85      0.85      0.85      1401

       accuracy                           0.78      8886
      macro avg       0.78      0.78      0.78      8886
   weighted avg       0.78      0.78      0.78      8886

--------------------------------------------------------------------------------------------------
Dataset from 2018-07-01 to 2019-06-01
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done   9 tasks      | elapsed:  3.7min
[Parallel(n_jobs=16)]: Done  37 out of  45 | elapsed: 18.7min remaining:  4.0min
[Parallel(n_jobs=16)]: Done  45 out of  45 | elapsed: 25.5min finished


Training time: 1642.4239 s
Prediction time: 48.8537 s

Report:

Train accuracy: 0.9004
Test accuracy: 0.8764

                  precision    recall  f1-score   support

         Vårbyg       0.76      0.68      0.72      1188
    Vinterhvede       0.90      0.90      0.90      1092
       Silomajs       0.91      0.96      0.93      1467
     Vinterraps       1.00      1.00      1.00      1151
      Vinterbyg       0.95      0.90      0.92      1269
       Vårhavre       0.72      0.75      0.74      1325
Vinterhybridrug       0.90      0.93      0.92      1393

       accuracy                           0.88      8885
      macro avg       0.88      0.87      0.88      8885
   weighted avg       0.88      0.88      0.88      8885

--------------------------------------------------------------------------------------------------
Dataset from 2018-07-01 to 2019-07-01
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done   9 tasks      | elapsed:  3.0min
[Parallel(n_jobs=16)]: Done  37 out of  45 | elapsed: 18.5min remaining:  4.0min
[Parallel(n_jobs=16)]: Done  45 out of  45 | elapsed: 24.7min finished


Training time: 1560.2114 s
Prediction time: 25.0906 s

Report:

Train accuracy: 0.9595
Test accuracy: 0.9459

                  precision    recall  f1-score   support

         Vårbyg       0.90      0.86      0.88      1188
    Vinterhvede       0.96      0.96      0.96      1092
       Silomajs       0.96      0.97      0.97      1467
     Vinterraps       1.00      1.00      1.00      1151
      Vinterbyg       0.98      0.97      0.97      1269
       Vårhavre       0.87      0.89      0.88      1325
Vinterhybridrug       0.96      0.97      0.96      1393

       accuracy                           0.95      8885
      macro avg       0.95      0.95      0.95      8885
   weighted avg       0.95      0.95      0.95      8885

--------------------------------------------------------------------------------------------------
Dataset from 2018-07-01 to 2019-08-01
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done   9 tasks      | elapsed:  2.6min
[Parallel(n_jobs=16)]: Done  37 out of  45 | elapsed: 19.5min remaining:  4.2min
[Parallel(n_jobs=16)]: Done  45 out of  45 | elapsed: 24.2min finished


Training time: 1504.5245 s
Prediction time: 20.0229 s

Report:

Train accuracy: 0.9749
Test accuracy: 0.9656

                  precision    recall  f1-score   support

         Vårbyg       0.91      0.93      0.92      1188
    Vinterhvede       0.97      0.98      0.97      1092
       Silomajs       0.97      0.98      0.98      1467
     Vinterraps       1.00      1.00      1.00      1151
      Vinterbyg       0.99      0.98      0.98      1269
       Vårhavre       0.94      0.92      0.93      1325
Vinterhybridrug       0.98      0.98      0.98      1393

       accuracy                           0.97      8885
      macro avg       0.97      0.97      0.97      8885
   weighted avg       0.97      0.97      0.97      8885

--------------------------------------------------------------------------------------------------
Dataset from 2018-07-01 to 2019-09-01
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done   9 tasks      | elapsed:   48.4s
[Parallel(n_jobs=16)]: Done  37 out of  45 | elapsed:  1.7min remaining:   22.2s


KeyboardInterrupt: 

time: 3h 2min 29s


In [None]:
year = 2018
for i in range(7, 24, 1):
    month = (i % 12) + 1
    if month == 1:
        year += 1
        
    print(f"--------------------------------------------------------------------------------------------------")
    print(f"Dataset from 2018-07-01 to {year}-{month:02}-01")
    df_sklearn = get_sklearn_df(polygons_year=2019, 
                                satellite_dates=slice('2018-07-01', f'{year}-{month:02}-01'), 
                                fields='all', 
                                satellite='all', 
                                polarization='all',
                                crop_type='all',
                                netcdf_path=netcdf_path)

    df_sklearn = df_sklearn[df_sklearn['afgroede'].isin(['Vårbyg', 'Vinterhvede', 'Silomajs', 'Vinterraps', 
                                                         'Vinterbyg', 'Vårhavre', 'Vinterhybridrug'])]
    crop_codes = df_sklearn['afgkode'].unique()
    mapping_dict = {}
    class_names = [] 

    for i, crop_code in enumerate(crop_codes):
        mapping_dict[crop_code] = i
        crop_type = df_sklearn[df_sklearn['afgkode'] == crop_code].head(1)['afgroede'].values[0]
        class_names.append(crop_type)

    df_sklearn_remapped = df_sklearn.copy()
    df_sklearn_remapped['afgkode'] = df_sklearn_remapped['afgkode'].map(mapping_dict)
    #print(f"Crop types: {class_names}")

    array = df_sklearn_remapped.values

    # Define the independent variables as features.
    X = np.float32(array[:,3:])  # The features 

    # Define the target (dependent) variable as labels.
    y = np.int8(array[:,1])  # The column 'afgkode'

    # Create a train/test split using 30% test size.
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=RANDOM_SEED)

    #print(f"Train samples:      {len(y_train)}")
    #print(f"Test samples:       {len(y_test)}")
    #print(f"Number of features: {len(X[0,:])}")

    from sklearn.svm import SVC   
    from sklearn.model_selection import GridSearchCV
    param_grid = {'C': [1, 10, 100, 1000], 'gamma': [0.00001, 0.0001, 0.001, 0.01, 0.1, 1], 'kernel': ['rbf']}
    grid = GridSearchCV(SVC(class_weight='balanced'), param_grid, refit=True, cv=5, verbose=20, n_jobs=32)

    grid_trained, _ = evaluate_classifier(grid, X_train, X_test, y_train, y_test, class_names, feature_scale=True)

--------------------------------------------------------------------------------------------------
Dataset from 2018-07-01 to 2018-08-01
Fitting 5 folds for each of 24 candidates, totalling 120 fits


[Parallel(n_jobs=32)]: Using backend LokyBackend with 32 concurrent workers.
[Parallel(n_jobs=32)]: Done   1 tasks      | elapsed:  1.0min
[Parallel(n_jobs=32)]: Done   2 tasks      | elapsed:  1.0min
[Parallel(n_jobs=32)]: Done   3 tasks      | elapsed:  1.0min
[Parallel(n_jobs=32)]: Done   4 tasks      | elapsed:  1.0min
[Parallel(n_jobs=32)]: Done   5 tasks      | elapsed:  1.1min
[Parallel(n_jobs=32)]: Done   6 tasks      | elapsed:  1.1min
[Parallel(n_jobs=32)]: Done   7 tasks      | elapsed:  1.1min
[Parallel(n_jobs=32)]: Done   8 tasks      | elapsed:  1.1min
[Parallel(n_jobs=32)]: Done   9 tasks      | elapsed:  1.1min
[Parallel(n_jobs=32)]: Done  10 tasks      | elapsed:  1.1min
[Parallel(n_jobs=32)]: Done  11 tasks      | elapsed:  1.1min
[Parallel(n_jobs=32)]: Done  12 tasks      | elapsed:  1.2min
[Parallel(n_jobs=32)]: Done  13 tasks      | elapsed:  1.2min
[Parallel(n_jobs=32)]: Done  14 tasks      | elapsed:  1.2min
[Parallel(n_jobs=32)]: Done  15 tasks      | elapsed:  

Training time: 529.1609 s
Prediction time: 5.1506 s

Report:

Train accuracy: 0.4168
Test accuracy: 0.378

                  precision    recall  f1-score   support

         Vårbyg       0.25      0.21      0.23      1192
    Vinterhvede       0.41      0.25      0.31      1092
       Silomajs       0.68      0.57      0.62      1469
     Vinterraps       0.40      0.34      0.37      1125
      Vinterbyg       0.25      0.46      0.33      1293
       Vårhavre       0.29      0.36      0.32      1312
Vinterhybridrug       0.53      0.39      0.45      1412

       accuracy                           0.38      8895
      macro avg       0.40      0.37      0.37      8895
   weighted avg       0.41      0.38      0.38      8895

--------------------------------------------------------------------------------------------------
Dataset from 2018-07-01 to 2018-09-01
Fitting 5 folds for each of 24 candidates, totalling 120 fits


[Parallel(n_jobs=32)]: Using backend LokyBackend with 32 concurrent workers.
[Parallel(n_jobs=32)]: Done   1 tasks      | elapsed:  2.0min
[Parallel(n_jobs=32)]: Done   2 tasks      | elapsed:  2.1min
[Parallel(n_jobs=32)]: Done   3 tasks      | elapsed:  2.2min
[Parallel(n_jobs=32)]: Done   4 tasks      | elapsed:  2.2min
[Parallel(n_jobs=32)]: Done   5 tasks      | elapsed:  2.3min
[Parallel(n_jobs=32)]: Done   6 tasks      | elapsed:  2.3min
[Parallel(n_jobs=32)]: Done   7 tasks      | elapsed:  2.3min
[Parallel(n_jobs=32)]: Done   8 tasks      | elapsed:  2.4min
[Parallel(n_jobs=32)]: Done   9 tasks      | elapsed:  2.4min
[Parallel(n_jobs=32)]: Done  10 tasks      | elapsed:  2.4min
[Parallel(n_jobs=32)]: Done  11 tasks      | elapsed:  2.5min
[Parallel(n_jobs=32)]: Done  12 tasks      | elapsed:  2.5min
[Parallel(n_jobs=32)]: Done  13 tasks      | elapsed:  2.5min
[Parallel(n_jobs=32)]: Done  14 tasks      | elapsed:  2.6min
[Parallel(n_jobs=32)]: Done  15 tasks      | elapsed:  