In [1]:
import sys
from argparse import ArgumentParser, Namespace
from pathlib import Path

sys.path.append('..')

from src.models import STR2MODEL

In [2]:
DATA_DIR = Path("../data")

In [44]:
def get_model(add_geowiki: bool, add_nigeria: bool, geowiki_subset: str):
    parser = ArgumentParser()
    parser.add_argument("--max_epochs", type=int, default=100)
    parser.add_argument("--patience", type=int, default=10)
    parser.add_argument("--gpus", type=int, default=0)
    parser.add_argument("--wandb", default=False, action="store_true")

    model_args = STR2MODEL["land_cover"].add_model_specific_args(parser).parse_args(args=[])
    new_model_args_dict = vars(model_args)

    # SET MODIFICATIONS TO DEFAULT MODEL ARGUMENTS:
    new_model_args_dict['add_geowiki'] = add_geowiki
    new_model_args_dict['add_nigeria'] = add_nigeria
    new_model_args_dict['geowiki_subset'] =  geowiki_subset # 'nigeria', 'neighbours1'

    new_model_args = Namespace(**new_model_args_dict)
    model = STR2MODEL["land_cover"](new_model_args)
    
    return model


def set_statistics(df):
    total = len(df)
    value_counts = df.is_crop.value_counts()
    cropland_ratio = df.is_crop.sum() / total

    print(f"Total points: {total}")
    print(f"Cropland count: {value_counts[1]}")
    print(f"Non-cropland count: {value_counts[0]}")
    print(f"Cropland ratio: {cropland_ratio:.3f}")
    

## Geowiki

### Geowiki world

In [46]:
add_geowiki = True
add_nigeria = False
geowiki_subset = 'world'
landcovermapper = get_model(add_geowiki, add_nigeria, geowiki_subset)
df = landcovermapper.geowiki_dataset.labels
df.is_crop.value_counts()

Found normalizing dict geowiki_normalizing_dict.h5
Loading normalizing dict geowiki_normalizing_dict.h5
Creating Geowiki train split
Creating Geowiki val split
Number of instances in Geowiki training set: 19808
Total number of files used for training: 19808
Number of model parameters: 25473


1    13980
0    10781
Name: is_crop, dtype: int64

In [47]:
set_statistics(df)

Total points: 24761
Cropland count: 13980
Non-cropland count: 10781
Cropland ratio: 0.565


In [49]:
set_statistics(landcovermapper.geowiki_train.labels)

Total points: 19808
Cropland count: 11131
Non-cropland count: 8677
Cropland ratio: 0.562


In [50]:
set_statistics(landcovermapper.geowiki_val.labels)

Total points: 4953
Cropland count: 2849
Non-cropland count: 2104
Cropland ratio: 0.575


### Geowiki neighbours

In [59]:
add_geowiki = True
add_nigeria = False
geowiki_subset = 'neighbours1'
landcovermapper = get_model(add_geowiki, add_nigeria, geowiki_subset)
df = landcovermapper.geowiki_dataset.labels
df.is_crop.value_counts()

Found normalizing dict geowiki_normalizing_dict_Ghana_Togo_Nigeria_Cameroon_Benin.h5
Loading normalizing dict geowiki_normalizing_dict_Ghana_Togo_Nigeria_Cameroon_Benin.h5
Creating Geowiki train split
Creating Geowiki val split
Number of instances in Geowiki training set: 632
Total number of files used for training: 632
Number of model parameters: 25473


1    460
0    330
Name: is_crop, dtype: int64

In [60]:
set_statistics(df)

Total points: 790
Cropland count: 460
Non-cropland count: 330
Cropland ratio: 0.582


In [61]:
set_statistics(landcovermapper.geowiki_train.labels)

Total points: 632
Cropland count: 364
Non-cropland count: 268
Cropland ratio: 0.576


In [62]:
set_statistics(landcovermapper.geowiki_val.labels)

Total points: 158
Cropland count: 96
Non-cropland count: 62
Cropland ratio: 0.608


### Geowiki nigeria

In [63]:
add_geowiki = True
add_nigeria = False
geowiki_subset = 'nigeria'
landcovermapper = get_model(add_geowiki, add_nigeria, geowiki_subset)
df = landcovermapper.geowiki_dataset.labels
df.is_crop.value_counts()

Found normalizing dict geowiki_normalizing_dict_Nigeria.h5
Loading normalizing dict geowiki_normalizing_dict_Nigeria.h5
Creating Geowiki train split
Creating Geowiki val split
Number of instances in Geowiki training set: 361
Total number of files used for training: 361
Number of model parameters: 25473


1    312
0    140
Name: is_crop, dtype: int64

In [64]:
set_statistics(df)

Total points: 452
Cropland count: 312
Non-cropland count: 140
Cropland ratio: 0.690


In [65]:
set_statistics(landcovermapper.geowiki_train.labels)

Total points: 361
Cropland count: 244
Non-cropland count: 117
Cropland ratio: 0.676


In [66]:
set_statistics(landcovermapper.geowiki_val.labels)

Total points: 91
Cropland count: 68
Non-cropland count: 23
Cropland ratio: 0.747


## Nigeria

In [67]:
add_geowiki = False
add_nigeria = True
landcovermapper = get_model(add_geowiki, add_nigeria, geowiki_subset)

Number of instances in Nigeria training set: 913
Total number of files used for training: 913
Number of model parameters: 25473


In [68]:
train_dataset = landcovermapper.get_dataset(subset="training")
val_dataset = landcovermapper.get_dataset(subset="validation", normalizing_dict=landcovermapper.normalizing_dict)
test_dataset = landcovermapper.get_dataset(subset="testing", normalizing_dict=landcovermapper.normalizing_dict)

Number of instances in Nigeria training set: 913
Total number of files used for training: 913
Number of instances in Nigeria validation set: 454
Total number of files used for validation: 454
Number of instances in Nigeria testing set: 455


In [69]:
len(train_dataset) + len(val_dataset) + len(test_dataset)

1822

In [80]:
# Cropland points
train_dataset.datasets['nigeria'].is_crop.sum() + val_dataset.datasets['nigeria'].is_crop.sum() + test_dataset.datasets['nigeria'].is_crop.sum()

745

In [81]:
# Cropland ratio
round((train_dataset.datasets['nigeria'].is_crop.sum() + val_dataset.datasets['nigeria'].is_crop.sum() + test_dataset.datasets['nigeria'].is_crop.sum()) / (train_dataset.datasets['nigeria'].is_crop.shape[0] + val_dataset.datasets['nigeria'].is_crop.shape[0] + test_dataset.datasets['nigeria'].is_crop.shape[0]), 3)

0.409

In [73]:
set_statistics(train_dataset.datasets['nigeria'])

Total points: 913
Cropland count: 381
Non-cropland count: 532
Cropland ratio: 0.417


In [74]:
set_statistics(val_dataset.datasets['nigeria'])

Total points: 454
Cropland count: 181
Non-cropland count: 273
Cropland ratio: 0.399


In [75]:
set_statistics(test_dataset.datasets['nigeria'])

Total points: 455
Cropland count: 183
Non-cropland count: 272
Cropland ratio: 0.402
