In [1]:
import sys
from pathlib import Path

import numpy as np
import geopandas as gpd
from pytorch_lightning import Trainer

sys.path.append("..")

from src.models import LandCoverMapper


## Load labels with coordinates

In [None]:
gdf = gpd.read_file(Path('../data/features/nigeria-cropharvest/labels.geojson'))
gdf

In [None]:
# Drop missing arrays
arrays_folder = Path('../data/features/nigeria-cropharvest/features/arrays')
existing_arrays_ids = sorted([int(str(path.stem).split('_')[0]) for path in arrays_folder.glob('*.h5')])
missing_files_identifiers = list(set(list(range(1827))) - set(existing_arrays_ids))
missing_files_identifiers

In [None]:
eval_set = 'testing' # 'validation'
val_gdf = gdf[gdf['new_set'] == eval_set]

# We don't conside the missing h5 files to ensure a fair comparison with the other models
val_gdf = val_gdf.loc[~val_gdf['identifier'].isin(missing_files_identifiers)]
val_gdf

In [None]:
results_path = Path('../data/lightning_logs/version_949/testing')
preds = np.load(results_path / 'all_preds.npy')
labels = np.load(results_path / 'all_labels.npy')

In [None]:
preds, labels # don't have the coordinates though like this though, find out if order from gdf is the same

### TODOS:
- [x] Get test set predictions of the model (with saved predictions?)
- [x] Check with validation set
- [ ] Check if it they all give same results: saved model predictions, predicting again with the saved model in the test set, and querying the map on the test set points
- [ ] Check neighbours1 model without weighted loss function (has a more balanced recall and precision)

In [None]:
# Check that all labels are in the same order as the dataframe with labels so predictions can be appended as a column
assert all(labels.squeeze() == val_gdf.is_crop.to_numpy()), 'labels saved from model checkpoint are not in the same order as the dataframe with labels'

In [None]:
val_gdf['preds'] = preds.squeeze()
val_gdf['preds_thr0.5'] = (val_gdf['preds'] > 0.5).astype(int)    
val_gdf

In [None]:
val_gdf.plot(column='preds', legend=True, figsize=(8, 8))

In [None]:
val_gdf.plot(column='preds_thr0.5', legend=True, figsize=(8, 8))

In [None]:
val_gdf.plot(column='is_crop', legend=True, figsize=(8, 8))

### Checks

Same accuracy as reported in the results csv file: `results/final/lstm/results_final_lstm.csv`

In [None]:
((val_gdf['preds'] > 0.5) == val_gdf['is_crop']).sum() / len(val_gdf) # should be 0.841758241758242 so it matches

Load model from checkpoint and get predictions on test set (to compare with saved ones)

In [None]:
model_path = "../data/lightning_logs/version_867/checkpoints/.ckpt"
model_path = str(results_path.parent / 'checkpoints' / 'epoch=22.ckpt')

print(f"Using model {model_path}")
inference = True if eval_set == 'testing' else False
model = LandCoverMapper.load_from_checkpoint(model_path, inference=inference)
#model.hparams 

In [None]:
model.hparams

In [None]:
trainer = Trainer()
trainer.test(model)

In [None]:
trainer = Trainer()
trainer.test(model)