In [None]:
from argparse import ArgumentParser, Namespace
import h5py
from itertools import permutations
from pathlib import Path
from typing import cast, Optional, List, Tuple, Dict, Type, TypeVar, Sequence
from tqdm import tqdm
import sys

import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import torch
from torch.utils.data import Dataset
from shapely.geometry import Point
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score, accuracy_score
from cropharvest.datasets import CropHarvest, CropHarvestLabels, Task
from cropharvest.columns import NullableColumns, RequiredColumns
from cropharvest.config import FEATURES_DIR
from cropharvest.engineer import Engineer
from cropharvest.utils import load_normalizing_dict
from cropharvest.bands import BANDS, DYNAMIC_BANDS, STATIC_BANDS, REMOVED_BANDS

sys.path.append("..")

from src.models import STR2MODEL, STR2BASE, train_model


S2_BANDS = ['B2','B3','B4','B5','B6','B7','B8','B8A','B9','B11','B12','NDVI']

## Stochasticity analysis

In [None]:
def get_model(add_geowiki: bool, add_nigeria: bool, geowiki_subset: str, seed: int, exp_name: str):
    parser = ArgumentParser()
    parser.add_argument("--max_epochs", type=int, default=100)
    parser.add_argument("--patience", type=int, default=10)
    parser.add_argument("--gpus", type=int, default=0)
    parser.add_argument("--wandb", default=False, action="store_true")
    parser.add_argument("--seed", type=int, default=0)

    model_args = STR2MODEL["land_cover"].add_model_specific_args(parser).parse_args(args=[])
    new_model_args_dict = vars(model_args)

    # SET MODIFICATIONS TO DEFAULT MODEL ARGUMENTS:
    new_model_args_dict['add_geowiki'] = add_geowiki
    new_model_args_dict['add_nigeria'] = add_nigeria
    new_model_args_dict['geowiki_subset'] =  geowiki_subset # 'nigeria', 'neighbours1'
    new_model_args_dict['weighted_loss_fn'] = True
    new_model_args_dict['seed'] = seed
    new_model_args_dict['inference'] = True
    new_model_args_dict['exp_name'] = exp_name

    new_model_args = Namespace(**new_model_args_dict)
    model = STR2MODEL["land_cover"](new_model_args)
    print('Model arguments: ', new_model_args)
    
    return model, new_model_args

In [None]:
num_runs = 10
random_seeds = np.random.randint(1, 1000, size=num_runs).tolist()
random_seeds

In [None]:
trained = []
for i in range(num_runs):
    print(f'Run: {i}, seed: {random_seeds[i]}')
    model, new_model_args = get_model(True, True, 'neighbours1', random_seeds[i], exp_name='stochasticity_analysis_neighbours1')
    trainer = train_model(model, new_model_args)
    trained.append(trainer)

In [None]:
trained

In [None]:
for _trainer in trained:
    _trainer.test()

Need to run `python parse_results.py stochasticity_analysis_neighbours1 lstm` first

In [None]:
df = pd.read_csv('../results/stochasticity_analysis_neighbours1/lstm/results_stochasticity_analysis_neighbours1_lstm.csv')
df

In [None]:
df.test_accuracy.mean(), df.test_accuracy.std()

In [None]:
df.test_f1_score.mean(), df.test_f1_score.std()

Need to run `python parse_results.py stochasticity_analysis lstm` first (nigeria)

In [None]:
df = pd.read_csv('../results/stochasticity_analysis/lstm/results_stochasticity_analysis_lstm.csv')
df

In [None]:
df.test_accuracy.mean(), df.test_accuracy.std()


In [None]:
df.test_f1_score.mean(), df.test_f1_score.std()