
# ViEWS 3 constituent models 
## ViEWS production system, cm level


This notebook trains a set of regression models for use in the monthly updated ViEWS predicting fatalities ensemble

The notebook does the following: 
1. Retrieves data through querysets and stores in DataSets, a list of dictionaries
2. Specifies the metadata of a number of models, stores in ModelList, a list of dictionaries
3. Trains the models in ModelList, stores the trained objects in model storage and prediction storage
4. Saves part of ModelList as csv and the rest as pickles


## Importing modules

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Basics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook
# sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.ensemble import AdaBoostRegressor
from sklearn import linear_model
from sklearn.metrics import mean_squared_error
from sklearn import preprocessing
from sklearn.linear_model import ElasticNet
from sklearn.datasets import make_regression

from xgboost import XGBRegressor
from xgboost import XGBClassifier
from xgboost import XGBRFRegressor, XGBRFClassifier

from lightgbm import LGBMClassifier, LGBMRegressor

# Views 3
from viewser.operations import fetch
import views_runs
from views_partitioning import data_partitioner, legacy
from stepshift import views
from views_runs import storage
from views_runs.storage import store, retrieve, fetch_metadata

from views_forecasts.extensions import *

# Other packages
import pickle as pkl

# Packages from viewsforecasting repository

#from Ensembling import CalibratePredictions, RetrieveStoredPredictions, mean_sd_calibrated, gam_calibrated
import os
import sys
sys.path.append('../')
sys.path.append('../Tools')
sys.path.append('../Intermediates')
from FetchData import FetchData, RetrieveFromList, document_queryset, ReturnQsList, document_ensemble
from ViewsEstimators import *


## Common parameters

In [3]:
#!conda list | grep views

In [4]:
# To do:
# find out why and where missingness occurs

In [5]:
# Common parameters:
dev_id = 'Fatalities002'
run_id = dev_id

# Generating a new run if necessary

#try:
#    ViewsMetadata().new_run(name=run_id,description='Developing the fatalities model for FCDO',min_month=1,max_month=999)
#except KeyError:
#    if 'devel' not in run_id:
#        warnings.warn('You are overwriting a production system')

RerunQuerysets = True

EndOfHistory = 517
steps = [*range(1, 36+1, 1)] # Which steps to train and predict for
fi_steps = [1,3,6,12,36] # Which steps to present feature importances for
#steps = [1,3,6,12,36]
#fi_steps = [1,3,6,12,36]

# Specifying partitions
calib_partitioner_dict = {"train":(121,408),"predict":(409,456)}
test_partitioner_dict = {"train":(121,456),"predict":(457,504)}
future_partitioner_dict = {"train":(121,504),"predict":(505,516)}
calib_partitioner =  views_runs.DataPartitioner({"calib":calib_partitioner_dict})
test_partitioner =  views_runs.DataPartitioner({"test":test_partitioner_dict})
future_partitioner =  views_runs.DataPartitioner({"future":future_partitioner_dict})

Mydropbox = f'/Users/{os.getlogin()}/Dropbox (ViEWS)/ViEWS'
print('Setting Mydropbox to',Mydropbox)

Setting Mydropbox to /Users/jim/Dropbox (ViEWS)/ViEWS


# Retrieve data

In [6]:
calib_partitioner_dict

{'train': (121, 408), 'predict': (409, 456)}

In [7]:
# Create Markdown documentation of all querysets used
level = 'cm'
qslist = ReturnQsList(level)
document_queryset(qslist,dev_id)

 .    fatalities002_baseline; A dataset with 6 columns, with data between t 1 and 852. (213 units)
 .    fatalities002_topics_stub; A dataset with 64 columns, with data between t 1 and 852. (213 units)
 .    fatalities002_aquastat_stub; A dataset with 11 columns, with data between t 1 and 852. (213 units)
 .    fatalities002_cm_conflict_history_stub; A dataset with 24 columns, with data between t 1 and 852. (213 units)
 .    fatalities002_cm_conflict_history_ext; A dataset with 33 columns, with data between t = 1 and 852. (213 units)
 .    fatalities002_vdem_short_stub; A dataset with 58 columns, with data between t 1 and 852. (213 units)
 .    fatalities002_wdi_short_stub; A dataset with 28 columns, with data between t 1 and 852. (213 units)
 .    fatalities002_joint_narrow; A dataset with 41 columns, with data between t 1 and 852. (213 units)
 .    fatalities002_joint_broad_stub; A dataset with 77 columns, with data between t 1 and 852. (213 units)
 .    fatalities002_faostat_stub;A 

In [8]:
from FetchData import fetch_cm_data_from_model_def

Datasets=fetch_cm_data_from_model_def(qslist, EndOfHistory)

 .    baseline002: A dataset with 6 columns, with data between t = 1 and 852; 213 units.
 .    wdi_short: A dataset with 34 columns, with data between t = 1 and 852; 213 units.
 .    conflictlong_ln: A dataset with 63 columns, with data between t = 1 and 852; 213 units.
 .    all_features: A dataset with 191 columns, with data between t = 1 and 852; 213 units.
 .    faoprices: A dataset with 17 columns, with data between t = 1 and 852; 213 units.
 .    vdem_short: A dataset with 64 columns, with data between t = 1 and 852; 213 units.
 .    conflict_ln: A dataset with 30 columns, with data between t = 1 and 852; 213 units.
 .    imfweo: A dataset with 11 columns, with data between t = 1 and 852; 213 units.
 .    aquastat: A dataset with 17 columns, with data between t = 1 and 852; 213 units.
 .    faostat: A dataset with 41 columns, with data between t = 1 and 852; 213 units.
 .    topics_002: A dataset with 70 columns, with data between t = 1 and 852; 213 units.
 .    joint_broad: A da

In [9]:
for ds in Datasets:
    try:
        ds['df']['vdem_v2clrgunev'].head()
    except:
        pass

# Generating predictions
Using the ViEWS3 partitioning/stepshifting syntax. Training models for A: calibration partition and B: test partition, to test out some calibration routines. Most models trained with ln_ged_sb_best as outcome.

In [10]:
from views_runs import ModelMetadata 
help(ModelMetadata)

Help on class ModelMetadata in module views_schema.models:

class ModelMetadata(pydantic.main.BaseModel)
 |  ModelMetadata(*, author: str, queryset_name: str, train_start: int, train_end: int, steps: Optional[List[int]] = None, training_date: datetime.datetime) -> None
 |  
 |  ModelMetadata
 |  
 |  Data used to organize model objects.
 |  
 |  parameters:
 |      author (str): Name of the user that authored the model object.
 |      queryset_name (str): Name of the queryset used to train the model
 |      train_start (int): Month identifier for training start date
 |      train_start (int): Month identifier for training end date
 |      training_date (datetime.datetime): Timestamp for training date (use datetime.datetime.now())
 |  
 |  example:
 |  
 |      # Instantiate the class with values
 |  
 |      my_metadata = ModelMetadata(
 |          author = "my_name",
 |          queryset_name = "my_queryset",
 |          train_start = 1,
 |          train_end = 300,
 |          steps 




## Checking missingness and infinity values

In [11]:
N=51
for i in range(len(Datasets)):
    df = Datasets[i]['df']
    print(Datasets[i]['Name'])
    for col in df.iloc[: , :N].columns:
        print(col,len(df[col]), 'missing:', df[col].isnull().sum(), 'infinity:', np.isinf(df).values.sum())


baseline002
ln_ged_sb_dep 158230 missing: 0 infinity: 0
ln_ged_sb 158230 missing: 0 infinity: 0
wdi_sp_pop_totl 158230 missing: 2242 infinity: 0
decay_ged_sb_5 158230 missing: 0 infinity: 0
decay_ged_os_5 158230 missing: 0 infinity: 0
splag_1_decay_ged_sb_5 158230 missing: 0 infinity: 0
wdi_short
ln_ged_sb_dep 158230 missing: 0 infinity: 0
ln_ged_sb 158230 missing: 0 infinity: 0
wdi_sp_pop_totl 158230 missing: 2242 infinity: 0
wdi_ag_lnd_frst_k2 158230 missing: 2620 infinity: 0
wdi_dt_oda_odat_pc_zs 158230 missing: 29607 infinity: 0
wdi_ms_mil_xpnd_gd_zs 158230 missing: 24108 infinity: 0
wdi_ms_mil_xpnd_zs 158230 missing: 28621 infinity: 0
wdi_nv_agr_totl_kd 158230 missing: 6597 infinity: 0
wdi_nv_agr_totl_kn 158230 missing: 5053 infinity: 0
wdi_ny_gdp_pcap_kd 158230 missing: 4071 infinity: 0
wdi_sp_dyn_le00_in 158230 missing: 3094 infinity: 0
wdi_se_enr_prim_fm_zs 158230 missing: 3448 infinity: 0
wdi_se_enr_prsc_fm_zs 158230 missing: 5152 infinity: 0
wdi_se_prm_nenr 158230 missing: 63

vdem_v2x_libdem 158230 missing: 15145 infinity: 0
vdem_v2x_libdem_48 158230 missing: 15166 infinity: 0
vdem_v2x_partip 158230 missing: 15145 infinity: 0
vdem_v2x_partipdem 158230 missing: 15145 infinity: 0
vdem_v2x_accountability 158230 missing: 15145 infinity: 0
vdem_v2x_civlib 158230 missing: 15145 infinity: 0
vdem_v2x_clphy 158230 missing: 15145 infinity: 0
vdem_v2x_cspart 158230 missing: 15145 infinity: 0
vdem_v2x_divparctrl 158230 missing: 15145 infinity: 0
vdem_v2x_edcomp_thick 158230 missing: 15145 infinity: 0
vdem_v2x_egal 158230 missing: 15145 infinity: 0
vdem_v2x_execorr 158230 missing: 15145 infinity: 0
vdem_v2x_frassoc_thick 158230 missing: 15145 infinity: 0
vdem_v2x_gencs 158230 missing: 15145 infinity: 0
vdem_v2x_gender 158230 missing: 15391 infinity: 0
vdem_v2x_genpp 158230 missing: 15391 infinity: 0
vdem_v2x_horacc 158230 missing: 15145 infinity: 0
vdem_v2x_neopat 158230 missing: 15145 infinity: 0
vdem_v2x_pubcorr 158230 missing: 15145 infinity: 0
vdem_v2x_rule 158230 m

topic_ste_theta3_stock_t2 158230 missing: 5 infinity: 0
topic_ste_theta3_stock_t13 158230 missing: 11 infinity: 0
topic_ste_theta4_stock_t1 158230 missing: 5 infinity: 0
topic_ste_theta4_stock_t2 158230 missing: 5 infinity: 0
topic_ste_theta4_stock_t13 158230 missing: 11 infinity: 0
topic_ste_theta5_stock_t1 158230 missing: 5 infinity: 0
topic_ste_theta5_stock_t2 158230 missing: 5 infinity: 0
topic_ste_theta5_stock_t13 158230 missing: 11 infinity: 0
topic_ste_theta6_stock_t1 158230 missing: 5 infinity: 0
topic_ste_theta6_stock_t2 158230 missing: 5 infinity: 0
topic_ste_theta6_stock_t13 158230 missing: 11 infinity: 0
topic_ste_theta7_stock_t1 158230 missing: 5 infinity: 0
topic_ste_theta7_stock_t2 158230 missing: 5 infinity: 0
topic_ste_theta7_stock_t13 158230 missing: 11 infinity: 0
topic_ste_theta8_stock_t1 158230 missing: 5 infinity: 0
topic_ste_theta8_stock_t2 158230 missing: 5 infinity: 0
topic_ste_theta8_stock_t13 158230 missing: 11 infinity: 0
topic_ste_theta9_stock_t1 158230 mis

pc9 74507 missing: 0 infinity: 0
pc10 74507 missing: 0 infinity: 0
pca_vdem
ln_ged_sb_dep 74507 missing: 0 infinity: 0
ln_ged_sb 74507 missing: 0 infinity: 0
decay_ged_sb_5 74507 missing: 0 infinity: 0
decay_ged_os_5 74507 missing: 0 infinity: 0
splag_1_decay_ged_sb_5 74507 missing: 0 infinity: 0
wdi_sp_pop_totl 74507 missing: 852 infinity: 0
pc1 74507 missing: 0 infinity: 0
pc2 74507 missing: 0 infinity: 0
pc3 74507 missing: 0 infinity: 0
pc4 74507 missing: 0 infinity: 0
pc5 74507 missing: 0 infinity: 0
pc6 74507 missing: 0 infinity: 0
pc7 74507 missing: 0 infinity: 0
pc8 74507 missing: 0 infinity: 0
pc9 74507 missing: 0 infinity: 0
pc10 74507 missing: 0 infinity: 0
pc11 74507 missing: 0 infinity: 0
pc12 74507 missing: 0 infinity: 0
pc13 74507 missing: 0 infinity: 0
pc14 74507 missing: 0 infinity: 0
pc15 74507 missing: 0 infinity: 0
pca_wdi
ln_ged_sb_dep 74507 missing: 0 infinity: 0
ln_ged_sb 74507 missing: 0 infinity: 0
decay_ged_sb_5 74507 missing: 0 infinity: 0
decay_ged_os_5 74507

# Specify models in ensemble

In [12]:
from ModelDefinitions import DefineEnsembleModels

ModelList = DefineEnsembleModels('cm')
    
for imodel,model in enumerate(ModelList):
    print(imodel, model['modelname'], model['data_train'])

0 fatalities002_baseline_rf baseline002
1 fatalities002_conflicthistory_rf conflict_ln
2 fatalities002_conflicthistory_gbm conflict_ln
3 fatalities002_conflicthistory_hurdle_lgb conflict_ln
4 fatalities002_conflicthistory_long_xgb conflictlong_ln
5 fatalities002_vdem_hurdle_xgb vdem_short
6 fatalities002_wdi_rf wdi_short
7 fatalities002_topics_rf topics_002
8 fatalities002_topics_xgb topics_002
9 fatalities002_topics_hurdle_lgb topics_002
10 fatalities002_joint_broad_rf joint_broad
11 fatalities002_joint_broad_hurdle_rf joint_broad
12 fatalities002_joint_narrow_xgb joint_narrow
13 fatalities002_joint_narrow_hurdle_xgb joint_narrow
14 fatalities002_joint_narrow_hurdle_lgb joint_narrow
15 fatalities002_all_pca3_xgb all_features
16 fatalities002_aquastat_rf aquastat
17 fatalities002_faostat_rf faostat
18 fatalities002_faoprices_rf faoprices
19 fatalities002_imfweo_rf imfweo
20 fatalities002_Markov_glm joint_narrow
21 fatalities002_Markov_rf joint_narrow


In [13]:
ModelList

[{'modelname': 'fatalities002_baseline_rf',
  'algorithm': XGBRFRegressor(base_score=None, booster=None, callbacks=None,
                 colsample_bylevel=None, colsample_bytree=None,
                 early_stopping_rounds=None, enable_categorical=False,
                 eval_metric=None, feature_types=None, gamma=None, gpu_id=None,
                 grow_policy=None, importance_type=None,
                 interaction_constraints=None, max_bin=None,
                 max_cat_threshold=None, max_cat_to_onehot=None,
                 max_delta_step=None, max_depth=None, max_leaves=None,
                 min_child_weight=None, missing=nan, monotone_constraints=None,
                 n_estimators=300, n_jobs=12, num_parallel_tree=None,
                 objective='reg:squarederror', predictor=None, random_state=None,
                 reg_alpha=None, ...),
  'depvar': 'ln_ged_sb_dep',
  'data_train': 'baseline002',
  'queryset': 'fatalities002_baseline',
  'preprocessing': 'float_it',
  'level

In [14]:
document_ensemble(ModelList,'sb')

0 fatalities002_baseline_rf baseline002
1 fatalities002_conflicthistory_rf conflict_ln
2 fatalities002_conflicthistory_gbm conflict_ln
3 fatalities002_conflicthistory_hurdle_lgb conflict_ln
4 fatalities002_conflicthistory_long_xgb conflictlong_ln
5 fatalities002_vdem_hurdle_xgb vdem_short
6 fatalities002_wdi_rf wdi_short
7 fatalities002_topics_rf topics_002
8 fatalities002_topics_xgb topics_002
9 fatalities002_topics_hurdle_lgb topics_002
10 fatalities002_joint_broad_rf joint_broad
11 fatalities002_joint_broad_hurdle_rf joint_broad
12 fatalities002_joint_narrow_xgb joint_narrow
13 fatalities002_joint_narrow_hurdle_xgb joint_narrow
14 fatalities002_joint_narrow_hurdle_lgb joint_narrow
15 fatalities002_all_pca3_xgb all_features
16 fatalities002_aquastat_rf aquastat
17 fatalities002_faostat_rf faostat
18 fatalities002_faoprices_rf faoprices
19 fatalities002_imfweo_rf imfweo
20 fatalities002_Markov_glm joint_narrow
21 fatalities002_Markov_rf joint_narrow


In [15]:
for ds in Datasets:
    df = ds['df']
    print(ds['Name'],df.isna().sum())
    ds['df']=df.fillna(0)

baseline002 ln_ged_sb_dep                0
ln_ged_sb                    0
wdi_sp_pop_totl           2242
decay_ged_sb_5               0
decay_ged_os_5               0
splag_1_decay_ged_sb_5       0
dtype: int64
wdi_short ln_ged_sb_dep                      0
ln_ged_sb                          0
wdi_sp_pop_totl                 2242
wdi_ag_lnd_frst_k2              2620
wdi_dt_oda_odat_pc_zs          29607
wdi_ms_mil_xpnd_gd_zs          24108
wdi_ms_mil_xpnd_zs             28621
wdi_nv_agr_totl_kd              6597
wdi_nv_agr_totl_kn              5053
wdi_ny_gdp_pcap_kd              4071
wdi_sp_dyn_le00_in              3094
wdi_se_enr_prim_fm_zs           3448
wdi_se_enr_prsc_fm_zs           5152
wdi_se_prm_nenr                 6365
wdi_sh_sta_maln_zs             30885
wdi_sh_sta_stnt_zs             30885
wdi_sl_tlf_totl_fe_zs          11586
wdi_sm_pop_refg_or              4664
wdi_sm_pop_netm                 2242
wdi_sm_pop_totl_zs              2641
wdi_sp_dyn_imrt_in              2242
wd

dtype: int64
pca_all ln_ged_sb_dep               0
ln_ged_sb                   0
decay_ged_sb_5              0
decay_ged_os_5              0
splag_1_decay_ged_sb_5      0
wdi_sp_pop_totl           852
pc1                         0
pc2                         0
pc3                         0
pc4                         0
pc5                         0
pc6                         0
pc7                         0
pc8                         0
pc9                         0
pc10                        0
pc11                        0
pc12                        0
pc13                        0
pc14                        0
pc15                        0
pc16                        0
pc17                        0
pc18                        0
pc19                        0
pc20                        0
dtype: int64
pca_topics ln_ged_sb_dep               0
ln_ged_sb                   0
decay_ged_sb_5              0
decay_ged_os_5              0
splag_1_decay_ged_sb_5      0
wdi_sp_pop_totl          

In [16]:
print('hello')

hello


In [17]:
# Loop that checks whether the model exists, retrains if not, 
# and stores the predictions if they have not been stored before for this run.
# To do: set the data_preprocessing to the function in the model dictionary

level = 'cm'
includeFuture = False
force_rewrite = True
force_retrain = True

print('hello')

from views_runs import Storage, StepshiftedModels
from views_partitioning.data_partitioner import DataPartitioner
from viewser import Queryset, Column
from views_runs import operations
from views_runs.run_result import RunResult

i = 0
for model in ModelList[:]:
    if model['algorithm'] != 'Rscript':
        modelstore = storage.Storage()
        ct = datetime.now()
        print(i, model['modelname'])
        print('Calibration partition', ct)
        model['Algorithm_text'] = str(model['algorithm'])
        model['RunResult_calib'] = RunResult.retrain_or_retrieve(
                retrain            = force_retrain,
                store              = modelstore,
                partitioner        = DataPartitioner({"calib":calib_partitioner_dict}),
                stepshifted_models = StepshiftedModels(model['algorithm'], steps, model['depvar']),
                dataset            = RetrieveFromList(Datasets,model['data_train']),
                queryset_name      = model['queryset'],
                partition_name     = "calib",
                timespan_name      = "train",
                storage_name       = model['modelname'] + '_calib',
                author_name        = "JED",
        )

    #    model['predstore_calib'] = level +  '_' + model['modelname'] + '_calib'
        ct = datetime.now()
        if force_rewrite:
            print(model['predstore_calib'], ', run',  run_id, 'force_rewrite=True, predicting')
            predictions_calib = model['RunResult_calib'].run.predict("calib","predict", model['RunResult_calib'].data)
            predictions_calib.forecasts.set_run(run_id)
            predictions_calib.forecasts.to_store(name=model['predstore_calib'],overwrite=True)
        else:
            print('Trying to retrieve predictions', ct)
            try:
                predictions_calib = pd.DataFrame.forecasts.read_store(run=run_id, name=model['predstore_calib'])
            except KeyError:
                print(model['predstore_calib'], ', run',  run_id, 'does not exist, predicting')
                predictions_calib = model['RunResult_calib'].run.predict("calib","predict", model['RunResult_calib'].data)
                predictions_calib.forecasts.set_run(run_id)
                predictions_calib.forecasts.to_store(name=model['predstore_calib'])

        ct = datetime.now()
        print('Test partition', ct)
        modelstore = storage.Storage()
        model['RunResult_test'] = RunResult.retrain_or_retrieve(
                retrain            = force_retrain,
                store              = modelstore,
                partitioner        = DataPartitioner({"test":test_partitioner_dict}),
                stepshifted_models = StepshiftedModels(model['algorithm'], steps, model['depvar']),
                dataset            = RetrieveFromList(Datasets,model['data_train']),
                queryset_name      = model['queryset'],
                partition_name     = "test",
                timespan_name      = "train",
                storage_name       = model['modelname'] + '_test',
                author_name        = "JED",
        )
        ct = datetime.now()
        
        if force_rewrite:
            print(model['predstore_test'], ', run',  run_id, 'force_rewrite=True, predicting')
            predictions_test = model['RunResult_test'].run.predict("test","predict", model['RunResult_test'].data)
            predictions_test.forecasts.set_run(run_id)
            predictions_test.forecasts.to_store(name=model['predstore_test'],overwrite=True)
        else:
            print('Trying to retrieve predictions', ct)
    #    model['predstore_test'] = level +  '_' + model['modelname'] + '_test'
            try:
                predictions_test = pd.DataFrame.forecasts.read_store(run=run_id, name=model['predstore_test'])
            except KeyError:
                print(model['predstore_test'], ', run', run_id, 'does not exist, predicting')
                predictions_test = model['RunResult_test'].run.predict("test","predict",model['RunResult_test'].data)
                predictions_test.forecasts.set_run(run_id)
                predictions_test.forecasts.to_store(name=model['predstore_test'])
        # Predictions for true future
        if includeFuture:
            ct = datetime.now()
            print('Future', ct)
            modelstore = storage.Storage()
            model['RunResult_future'] = RunResult.retrain_or_retrieve(
                    retrain            = force_retrain,
                    store              = modelstore,
                    partitioner        = DataPartitioner({"test":future_partitioner_dict}),
                    stepshifted_models = StepshiftedModels(model['algorithm'], steps, model['depvar']),
                    dataset            = RetrieveFromList(Datasets,model['data_train']),
                    queryset_name      = model['queryset'],
                    partition_name     = "test",
                    timespan_name      = "train",
                    storage_name       = model['modelname'] + '_future',
                    author_name        = "JED",
            )
            ct = datetime.now()
            
            if force_rewrite:
                print(model['predstore_future'], ', run',  run_id, 'force_rewrite=True, predicting')
                predictions_future = model['RunResult_future'].run.predict(EndOfHistory, model['RunResult_future'].data)
                predictions_future.forecasts.set_run(run_id)
                predictions_future.forecasts.to_store(name=model['predstore_future'],overwrite=True)
            else:
                print('Trying to retrieve predictions', ct)
                model['predstore_future'] = level +  '_' + model['modelname'] + '_f' + str(EndOfHistory)
                try:
                    predictions_future = pd.DataFrame.forecasts.read_store(run=run_id, name=model['predstore_future'])
                except KeyError:
                    print(model['predstore_future'], ', run', run_id, 'does not exist, predicting')
                    predictions_future = model['RunResult_future'].run.future_point_predict(EndOfHistory,model['RunResult_future'].data)
                    predictions_future.forecasts.set_run(run_id)
                    predictions_future.forecasts.to_store(name=model['predstore_future'])  
        print('**************************************************************')
    i = i + 1

print('All done')

hello
0 fatalities002_baseline_rf
Calibration partition 2023-03-27 11:15:25.914062
 * == Performing a run: "fatalities002_baseline_rf_calib" == * 
Model object named "fatalities002_baseline_rf_calib" with equivalent metadata already exists.
Retrain is true, overwriting "fatalities002_baseline_rf_calib"
Training model(s)...
Storing "fatalities002_baseline_rf_calib"
NOT STORING METADATA
cm_fatalities002_baseline_rf_calib , run Fatalities002 force_rewrite=True, predicting
Test partition 2023-03-27 11:17:07.119419
 * == Performing a run: "fatalities002_baseline_rf_test" == * 
Model object named "fatalities002_baseline_rf_test" with equivalent metadata already exists.
Retrain is true, overwriting "fatalities002_baseline_rf_test"
Training model(s)...
Storing "fatalities002_baseline_rf_test"
NOT STORING METADATA
cm_fatalities002_baseline_rf_test , run Fatalities002 force_rewrite=True, predicting
**************************************************************
1 fatalities002_conflicthistory_rf


Reordering feature dimension. Save memory by setting the outcome feature as the first column in your dataframe.


Storing "fatalities002_conflicthistory_rf_calib"
NOT STORING METADATA
cm_fatalities002_conflicthistory_rf_calib , run Fatalities002 force_rewrite=True, predicting
Test partition 2023-03-27 11:21:21.483039
 * == Performing a run: "fatalities002_conflicthistory_rf_test" == * 
Model object named "fatalities002_conflicthistory_rf_test" with equivalent metadata already exists.
Retrain is true, overwriting "fatalities002_conflicthistory_rf_test"
Training model(s)...


Reordering feature dimension. Save memory by setting the outcome feature as the first column in your dataframe.


Storing "fatalities002_conflicthistory_rf_test"
NOT STORING METADATA
cm_fatalities002_conflicthistory_rf_test , run Fatalities002 force_rewrite=True, predicting
**************************************************************
2 fatalities002_conflicthistory_gbm
Calibration partition 2023-03-27 11:23:56.152766
 * == Performing a run: "fatalities002_conflicthistory_gbm_calib" == * 
Model object named "fatalities002_conflicthistory_gbm_calib" with equivalent metadata already exists.
Retrain is true, overwriting "fatalities002_conflicthistory_gbm_calib"
Training model(s)...


Reordering feature dimension. Save memory by setting the outcome feature as the first column in your dataframe.


Storing "fatalities002_conflicthistory_gbm_calib"
NOT STORING METADATA
cm_fatalities002_conflicthistory_gbm_calib , run Fatalities002 force_rewrite=True, predicting
Test partition 2023-03-27 11:35:41.352367
 * == Performing a run: "fatalities002_conflicthistory_gbm_test" == * 
Model object named "fatalities002_conflicthistory_gbm_test" with equivalent metadata already exists.
Retrain is true, overwriting "fatalities002_conflicthistory_gbm_test"
Training model(s)...


Reordering feature dimension. Save memory by setting the outcome feature as the first column in your dataframe.


Storing "fatalities002_conflicthistory_gbm_test"
NOT STORING METADATA
cm_fatalities002_conflicthistory_gbm_test , run Fatalities002 force_rewrite=True, predicting
**************************************************************
3 fatalities002_conflicthistory_hurdle_lgb
Calibration partition 2023-03-27 11:50:15.300889
 * == Performing a run: "fatalities002_conflicthistory_hurdle_lgb_calib" == * 
Model object named "fatalities002_conflicthistory_hurdle_lgb_calib" with equivalent metadata already exists.
Retrain is true, overwriting "fatalities002_conflicthistory_hurdle_lgb_calib"
Training model(s)...


Reordering feature dimension. Save memory by setting the outcome feature as the first column in your dataframe.


Storing "fatalities002_conflicthistory_hurdle_lgb_calib"
NOT STORING METADATA
cm_fatalities002_conflicthistory_hurdle_lgb_calib , run Fatalities002 force_rewrite=True, predicting
Test partition 2023-03-27 11:51:02.247275
 * == Performing a run: "fatalities002_conflicthistory_hurdle_lgb_test" == * 
Model object named "fatalities002_conflicthistory_hurdle_lgb_test" with equivalent metadata already exists.
Retrain is true, overwriting "fatalities002_conflicthistory_hurdle_lgb_test"
Training model(s)...


Reordering feature dimension. Save memory by setting the outcome feature as the first column in your dataframe.


Storing "fatalities002_conflicthistory_hurdle_lgb_test"
NOT STORING METADATA
cm_fatalities002_conflicthistory_hurdle_lgb_test , run Fatalities002 force_rewrite=True, predicting
**************************************************************
4 fatalities002_conflicthistory_long_xgb
Calibration partition 2023-03-27 11:51:54.250199
 * == Performing a run: "fatalities002_conflicthistory_long_xgb_calib" == * 
Model object named "fatalities002_conflicthistory_long_xgb_calib" with equivalent metadata already exists.
Retrain is true, overwriting "fatalities002_conflicthistory_long_xgb_calib"
Training model(s)...


Reordering feature dimension. Save memory by setting the outcome feature as the first column in your dataframe.


Storing "fatalities002_conflicthistory_long_xgb_calib"
NOT STORING METADATA
cm_fatalities002_conflicthistory_long_xgb_calib , run Fatalities002 force_rewrite=True, predicting
Test partition 2023-03-27 11:53:39.960926
 * == Performing a run: "fatalities002_conflicthistory_long_xgb_test" == * 
Model object named "fatalities002_conflicthistory_long_xgb_test" with equivalent metadata already exists.
Retrain is true, overwriting "fatalities002_conflicthistory_long_xgb_test"
Training model(s)...


Reordering feature dimension. Save memory by setting the outcome feature as the first column in your dataframe.


Storing "fatalities002_conflicthistory_long_xgb_test"
NOT STORING METADATA
cm_fatalities002_conflicthistory_long_xgb_test , run Fatalities002 force_rewrite=True, predicting
**************************************************************
5 fatalities002_vdem_hurdle_xgb
Calibration partition 2023-03-27 11:55:46.870886
 * == Performing a run: "fatalities002_vdem_hurdle_xgb_calib" == * 
Model object named "fatalities002_vdem_hurdle_xgb_calib" with equivalent metadata already exists.
Retrain is true, overwriting "fatalities002_vdem_hurdle_xgb_calib"
Training model(s)...
Storing "fatalities002_vdem_hurdle_xgb_calib"
NOT STORING METADATA
cm_fatalities002_vdem_hurdle_xgb_calib , run Fatalities002 force_rewrite=True, predicting
Test partition 2023-03-27 11:57:45.967419
 * == Performing a run: "fatalities002_vdem_hurdle_xgb_test" == * 
Model object named "fatalities002_vdem_hurdle_xgb_test" with equivalent metadata already exists.
Retrain is true, overwriting "fatalities002_vdem_hurdle_xgb_test"


Reordering feature dimension. Save memory by setting the outcome feature as the first column in your dataframe.


Storing "fatalities002_joint_broad_rf_calib"
NOT STORING METADATA
cm_fatalities002_joint_broad_rf_calib , run Fatalities002 force_rewrite=True, predicting
Test partition 2023-03-27 12:41:32.433087
 * == Performing a run: "fatalities002_joint_broad_rf_test" == * 
Model object named "fatalities002_joint_broad_rf_test" with equivalent metadata already exists.
Retrain is true, overwriting "fatalities002_joint_broad_rf_test"
Training model(s)...


Reordering feature dimension. Save memory by setting the outcome feature as the first column in your dataframe.


Storing "fatalities002_joint_broad_rf_test"
NOT STORING METADATA
cm_fatalities002_joint_broad_rf_test , run Fatalities002 force_rewrite=True, predicting
**************************************************************
11 fatalities002_joint_broad_hurdle_rf
Calibration partition 2023-03-27 12:50:25.009087
 * == Performing a run: "fatalities002_joint_broad_hurdle_rf_calib" == * 
Model object named "fatalities002_joint_broad_hurdle_rf_calib" with equivalent metadata already exists.
Retrain is true, overwriting "fatalities002_joint_broad_hurdle_rf_calib"
Training model(s)...


Reordering feature dimension. Save memory by setting the outcome feature as the first column in your dataframe.


Storing "fatalities002_joint_broad_hurdle_rf_calib"
NOT STORING METADATA
cm_fatalities002_joint_broad_hurdle_rf_calib , run Fatalities002 force_rewrite=True, predicting
Test partition 2023-03-27 12:57:59.290647
 * == Performing a run: "fatalities002_joint_broad_hurdle_rf_test" == * 
Model object named "fatalities002_joint_broad_hurdle_rf_test" with equivalent metadata already exists.
Retrain is true, overwriting "fatalities002_joint_broad_hurdle_rf_test"
Training model(s)...


Reordering feature dimension. Save memory by setting the outcome feature as the first column in your dataframe.


Storing "fatalities002_joint_broad_hurdle_rf_test"
NOT STORING METADATA
cm_fatalities002_joint_broad_hurdle_rf_test , run Fatalities002 force_rewrite=True, predicting
**************************************************************
12 fatalities002_joint_narrow_xgb
Calibration partition 2023-03-27 13:05:24.788796
 * == Performing a run: "fatalities002_joint_narrow_xgb_calib" == * 
Model object named "fatalities002_joint_narrow_xgb_calib" with equivalent metadata already exists.
Retrain is true, overwriting "fatalities002_joint_narrow_xgb_calib"
Training model(s)...
Storing "fatalities002_joint_narrow_xgb_calib"
NOT STORING METADATA
cm_fatalities002_joint_narrow_xgb_calib , run Fatalities002 force_rewrite=True, predicting
Test partition 2023-03-27 13:10:12.786807
 * == Performing a run: "fatalities002_joint_narrow_xgb_test" == * 
Model object named "fatalities002_joint_narrow_xgb_test" with equivalent metadata already exists.
Retrain is true, overwriting "fatalities002_joint_narrow_xgb_te

Reordering feature dimension. Save memory by setting the outcome feature as the first column in your dataframe.


Storing "fatalities002_all_pca3_xgb_calib"
NOT STORING METADATA
cm_fatalities002_all_pca3_xgb_calib , run Fatalities002 force_rewrite=True, predicting
Test partition 2023-03-27 13:33:57.864447
 * == Performing a run: "fatalities002_all_pca3_xgb_test" == * 
Model object named "fatalities002_all_pca3_xgb_test" with equivalent metadata already exists.
Retrain is true, overwriting "fatalities002_all_pca3_xgb_test"
Training model(s)...


Reordering feature dimension. Save memory by setting the outcome feature as the first column in your dataframe.


Storing "fatalities002_all_pca3_xgb_test"
NOT STORING METADATA
cm_fatalities002_all_pca3_xgb_test , run Fatalities002 force_rewrite=True, predicting
**************************************************************
16 fatalities002_aquastat_rf
Calibration partition 2023-03-27 13:45:00.179811
 * == Performing a run: "fatalities002_aquastat_rf_calib" == * 
Model object named "fatalities002_aquastat_rf_calib" with equivalent metadata already exists.
Retrain is true, overwriting "fatalities002_aquastat_rf_calib"
Training model(s)...
Storing "fatalities002_aquastat_rf_calib"
NOT STORING METADATA
cm_fatalities002_aquastat_rf_calib , run Fatalities002 force_rewrite=True, predicting
Test partition 2023-03-27 13:47:38.880504
 * == Performing a run: "fatalities002_aquastat_rf_test" == * 
Model object named "fatalities002_aquastat_rf_test" with equivalent metadata already exists.
Retrain is true, overwriting "fatalities002_aquastat_rf_test"
Training model(s)...
Storing "fatalities002_aquastat_rf_tes

Reordering feature dimension. Save memory by setting the outcome feature as the first column in your dataframe.


Storing "fatalities002_faoprices_rf_calib"
NOT STORING METADATA
cm_fatalities002_faoprices_rf_calib , run Fatalities002 force_rewrite=True, predicting
Test partition 2023-03-27 14:02:57.364004
 * == Performing a run: "fatalities002_faoprices_rf_test" == * 
Model object named "fatalities002_faoprices_rf_test" with equivalent metadata already exists.
Retrain is true, overwriting "fatalities002_faoprices_rf_test"
Training model(s)...


Reordering feature dimension. Save memory by setting the outcome feature as the first column in your dataframe.


Storing "fatalities002_faoprices_rf_test"
NOT STORING METADATA
cm_fatalities002_faoprices_rf_test , run Fatalities002 force_rewrite=True, predicting
**************************************************************
19 fatalities002_imfweo_rf
Calibration partition 2023-03-27 14:05:32.485775
 * == Performing a run: "fatalities002_imfweo_rf_calib" == * 
Model object named "fatalities002_imfweo_rf_calib" with equivalent metadata already exists.
Retrain is true, overwriting "fatalities002_imfweo_rf_calib"
Training model(s)...


Reordering feature dimension. Save memory by setting the outcome feature as the first column in your dataframe.


Storing "fatalities002_imfweo_rf_calib"
NOT STORING METADATA
cm_fatalities002_imfweo_rf_calib , run Fatalities002 force_rewrite=True, predicting
Test partition 2023-03-27 14:07:20.313483
 * == Performing a run: "fatalities002_imfweo_rf_test" == * 
Model object named "fatalities002_imfweo_rf_test" with equivalent metadata already exists.
Retrain is true, overwriting "fatalities002_imfweo_rf_test"
Training model(s)...


Reordering feature dimension. Save memory by setting the outcome feature as the first column in your dataframe.


Storing "fatalities002_imfweo_rf_test"
NOT STORING METADATA
cm_fatalities002_imfweo_rf_test , run Fatalities002 force_rewrite=True, predicting
**************************************************************
All done


In [18]:
# Exploring the future predictions


predictions_test.xs(246,level=1).tail()

Unnamed: 0_level_0,imfweo_ngdp_rpch_tcurrent,imfweo_ngdp_rpch_tmin1,imfweo_ngdp_rpch_tplus1,imfweo_ngdp_rpch_tplus2,ln_ged_sb_dep,ln_ged_sb,gleditsch_ward,wdi_sp_pop_totl,decay_ged_sb_5,decay_ged_os_5,...,step_pred_33,step_pred_34,step_pred_35,step_pred_36,step_pred_4,step_pred_5,step_pred_6,step_pred_7,step_pred_8,step_pred_9
month_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
500,0.0,0.0,0.0,0.0,3.496508,3.496508,626.0,10606227.0,1.0,1.0,...,2.160521,2.817507,3.510852,3.116256,0.988872,1.494849,1.124294,1.557346,2.241737,2.447491
501,0.0,0.0,0.0,0.0,4.189655,4.189655,626.0,10606227.0,1.0,0.971532,...,2.186068,1.373313,3.149254,3.365624,1.580964,0.720338,1.668776,1.251758,1.775396,2.035585
502,5.325,-6.589,6.473,5.575,1.098612,1.098612,626.0,10606227.0,1.0,0.943874,...,1.987775,2.98702,3.193338,3.16565,2.653107,1.523172,0.868808,1.68736,1.657235,1.913898
503,0.0,0.0,0.0,0.0,0.0,0.0,626.0,10606227.0,0.971532,0.917004,...,1.941729,2.248417,3.009949,2.298844,2.544461,2.379781,1.646704,1.029877,1.546842,1.940765
504,0.0,0.0,0.0,0.0,4.127134,4.127134,626.0,10606227.0,0.943874,0.890899,...,2.106184,2.220343,2.128993,2.794464,2.752732,2.192604,2.302844,1.929972,1.333393,1.821715


## Notes on training time for the various algorithms:

In [None]:
#These are calculated in minutes for the hh20 feature set (with about 40 features), for all 36 steps, calibration (c) and test (t) partitions, also include generating predictions, and are approximate:

#nj=12 (number of threads)
#scikit random forest:        21:13 (c), 26:20 (t) RandomForestRegressor(n_estimators=200, n_jobs=nj)
#XGB random forest:           06:02 (c), 07:51 (t) XGBRFRegressor(n_estimators=300,n_jobs=nj)
#scikit gbm:                  13:59 (c), 15:55 (t) GradientBoostingRegressor(), 
#scikit hurdle random forest: 07:32 (c), 09:49 (t) For both clf and reg: (n_estimators=200, n_jobs=nj)
#XGB hurdle xgb:              01:26 (c), 01:32 (t) For both clf and reg:                n_estimators=200,tree_method='hist',n_jobs=nj)
#scikit histgbm:              01:17 (c), 01:20 (t) HistGradientBoostingRegressor(max_iter=200)
#XGB xgb:                     01:00 (c), 01:04 (t) XGBRegressor(n_estimators=200,tree_method='hist',n_jobs=nj)
#lightgbm gbm:                00:25 (c), --    (t) LGBMRegressor(n_estimators=100,num_threads=8)

# Various helper functions and tools....

In [None]:
!conda list | grep views-forecasts

# Retrieving external forecasts

In [19]:
# Retrieve David's Markov models
# To do: rewrite the model dictionary to the new, slimmer version.
DRList = []


model = {
    'modelname':   'fatalities002_Markov_glm',
    'algorithm': [],
    'depvar': "ln_ged_sb_dep",
    'data_train':      'joint_narrow',
    'queryset': 'fatalities002_joint_narrow',
}
DRList.append(model)

model = {
    'modelname':   'fatalities002_Markov_rf',
    'algorithm': [],
    'depvar': "ln_ged_sb_dep",
    'data_train':      'joint_narrow',
    'queryset': 'fatalities002_joint_narrow',
}

DRList.append(model)



In [20]:
path = f'/Users/{os.getlogin()}/Dropbox (ViEWS)/ViEWS/Projects/PredictingFatalities/Predictions/cm/preds/'

DRList[0]['predictions_file_calib'] = path + 'markov_jointnarrow_ss_glm_calib.parquet'
DRList[0]['predictions_file_test'] = path + 'markov_jointnarrow_ss_glm_test.parquet'
DRList[0]['predictions_file_future'] = path + 'vmm_glm_hh20_517.csv'

DRList[1]['predictions_file_calib'] = path + 'markov_jointnarrow_ss_rf_calib.parquet'
DRList[1]['predictions_file_test'] = path + 'markov_jointnarrow_ss_rf_test.parquet'
DRList[1]['predictions_file_future'] = path + 'vmm_rf_hh20_517.csv'

In [21]:
print(path)

/Users/jim/Dropbox (ViEWS)/ViEWS/Projects/PredictingFatalities/Predictions/cm/preds/


In [22]:

for model in ModelList:
    print(model['modelname'])

fatalities002_baseline_rf
fatalities002_conflicthistory_rf
fatalities002_conflicthistory_gbm
fatalities002_conflicthistory_hurdle_lgb
fatalities002_conflicthistory_long_xgb
fatalities002_vdem_hurdle_xgb
fatalities002_wdi_rf
fatalities002_topics_rf
fatalities002_topics_xgb
fatalities002_topics_hurdle_lgb
fatalities002_joint_broad_rf
fatalities002_joint_broad_hurdle_rf
fatalities002_joint_narrow_xgb
fatalities002_joint_narrow_hurdle_xgb
fatalities002_joint_narrow_hurdle_lgb
fatalities002_all_pca3_xgb
fatalities002_aquastat_rf
fatalities002_faostat_rf
fatalities002_faoprices_rf
fatalities002_imfweo_rf
fatalities002_Markov_glm
fatalities002_Markov_rf


In [23]:
ModelList[1].keys()

dict_keys(['modelname', 'algorithm', 'depvar', 'data_train', 'queryset', 'preprocessing', 'level', 'description', 'long_description', 'predstore_calib', 'predstore_test', 'Algorithm_text', 'RunResult_calib', 'RunResult_test'])

In [24]:
# Storing Markov models in central storage
# Retrieving dependent variable

print('Adding depvar - CHECK FILES BEING USED FROM STORAGE ARE SUITABLE!')
target_calib = pd.DataFrame.forecasts.read_store('cm_fatalities002_conflicthistory_rf_calib', run=run_id)['ln_ged_sb_dep']
target_test = pd.DataFrame.forecasts.read_store('cm_fatalities002_conflicthistory_rf_test', run=run_id)['ln_ged_sb_dep']
level = 'cm'
for model in DRList:
    df_calib = pd.read_parquet(model['predictions_file_calib'])
#    df_calib.rename(columns={'target_month_id':'month_id'}, inplace=True)
#    df_calib.set_index(['month_id', 'country_id'], inplace=True)

    df_test = pd.read_parquet(model['predictions_file_test'])
#    df_test.rename(columns={'target_month_id':'month_id'}, inplace=True)
#    df_calib.set_index(['month_id', 'country_id'], inplace=True)

#    df_future = pd.read_csv(model['predictions_file_future'],index_col=['month_id','country_id'])
    df_calib['ln_ged_sb_dep'] = target_calib
    df_test['ln_ged_sb_dep'] = target_test
#    df_future['ln_ged_sb_dep'] = np.nan # Empty dependent variable column for consistency/required by prediction storage function
    stored_modelname = level + '_' + model['modelname'] + '_calib'
    df_calib.forecasts.set_run(run_id)
    df_calib.forecasts.to_store(name=stored_modelname, overwrite=True)
    stored_modelname = level + '_' + model['modelname'] + '_test'
    df_test.forecasts.set_run(run_id)
    df_test.forecasts.to_store(name=stored_modelname, overwrite=True)    

Adding depvar - CHECK FILES BEING USED FROM STORAGE ARE SUITABLE!
pr_46_cm_fatalities002_conflicthistory_rf_calib.parquet
pr_46_cm_fatalities002_conflicthistory_rf_test.parquet


In [26]:
df_calib

Unnamed: 0_level_0,Unnamed: 1_level_0,step_pred_1,step_pred_2,step_pred_3,step_pred_4,step_pred_5,step_pred_6,step_pred_7,step_pred_8,step_pred_9,step_pred_10,...,step_pred_28,step_pred_29,step_pred_30,step_pred_31,step_pred_32,step_pred_33,step_pred_34,step_pred_35,step_pred_36,ln_ged_sb_dep
month_id,country_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
409,1,0.001254,0.003409,0.006215,0.001066,0.004169,0.010578,0.005992,0.020473,0.019996,0.005005,...,0.012472,0.018377,0.012473,0.013747,0.032416,0.046625,0.050143,0.072278,0.080886,0.000000
409,2,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.007520,0.007546,0.000000,...,0.000000,0.001575,0.006936,0.001899,0.002539,0.000000,0.008913,0.000651,0.000740,0.000000
409,3,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.003179,0.000000,0.001779,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.005663,0.000000,0.000000
409,4,0.002898,0.003205,0.006413,0.006959,0.010158,0.013262,0.015188,0.006469,0.032950,0.043885,...,0.014621,0.031005,0.014028,0.025524,0.013443,0.029052,0.021496,0.015022,0.026567,0.000000
409,7,0.000000,0.000000,0.009350,0.001341,0.000000,0.004274,0.001819,0.000000,0.004698,0.001394,...,0.001561,0.007737,0.003228,0.000000,0.014845,0.005248,0.012677,0.003699,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
456,242,0.166015,0.125419,0.136348,0.179223,0.137063,0.137121,0.132922,0.178581,0.141428,0.111044,...,0.171055,0.225071,0.195860,0.213776,0.219111,0.170831,0.178676,0.204080,0.233216,0.000000
456,243,0.067220,0.088962,0.047143,0.112824,0.078442,0.075687,0.055758,0.105428,0.082379,0.076006,...,0.089030,0.138568,0.121187,0.102905,0.134276,0.112413,0.164125,0.140054,0.152783,0.000000
456,244,0.283337,0.276383,0.194173,0.252582,0.211764,0.318501,0.253834,0.254390,0.182832,0.189275,...,0.190480,0.175311,0.250015,0.347963,0.235732,0.331532,0.240217,0.277715,0.237895,0.000000
456,245,2.465461,2.227980,2.118435,1.741521,1.905122,2.413388,2.303821,1.823218,2.236356,2.443836,...,2.314368,2.218897,2.265377,2.179763,1.973195,2.393342,2.417559,2.157516,1.989232,1.609438


In [None]:
pd.DataFrame(target_test)

In [None]:
ViewsMetadata().with_name('fatalities002_conflicthistory_rf_calib').fetch()

In [None]:
target_calib = pd.DataFrame.forecasts.read_store('cm_fatalities002_baseline_rf_calib', run=run_id)['ln_ged_sb_dep']

In [None]:
target_calib

In [None]:
target_calib = pd.DataFrame.forecasts.read_store('cm_fatalities002_conflicthistory_rf_calib', run=run_id)['ln_ged_sb_dep']

In [None]:
target_calib