In [1]:
import numpy as np
import pandas as pd
import joblib
import warnings
warnings.filterwarnings('ignore')

In [2]:
country = 'ESP'
model = joblib.load(f'models/{country}_model.pkl')
scaler = joblib.load(f'scalers/{country}_scaler.pkl')

In [3]:
main_data = pd.read_csv("combined_data.csv", index_col=[0, 1])

In [4]:
main_data 

Unnamed: 0_level_0,Unnamed: 1_level_0,Inflation,Population,GDP,Export,Import
Country Code,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ABW,1980,,59909.0,,,
ABW,1981,,60563.0,,,
ABW,1982,,61276.0,,,
ABW,1983,,62228.0,,,
ABW,1984,,62901.0,,,
...,...,...,...,...,...,...
ZWE,2017,0.893962,14812482.0,5.107466e+10,107.151887,83.660837
ZWE,2018,10.618866,15034452.0,3.415607e+10,124.909506,105.579357
ZWE,2019,255.304991,15271368.0,2.571741e+10,131.425343,79.585840
ZWE,2020,557.201817,15526888.0,2.686794e+10,135.325610,82.633621


In [5]:
country_data = main_data.loc['ESP']
country_data

Unnamed: 0_level_0,Inflation,Population,GDP,Export,Import
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1980,15.561902,37491165.0,232600600000.0,,
1981,14.549346,37758631.0,202663000000.0,,
1982,14.415002,37986012.0,195856800000.0,,
1983,12.174073,38171525.0,170829100000.0,,
1984,11.280277,38330364.0,171980000000.0,,
1985,8.814455,38469512.0,180664300000.0,,
1986,8.794939,38584624.0,251141600000.0,,
1987,5.248019,38684815.0,318520300000.0,,
1988,4.837271,38766939.0,375891700000.0,,
1989,6.791436,38827764.0,414460800000.0,,


In [6]:
country_data = country_data.dropna()

In [7]:
features_columns = ['Population', 'Inflation', 'Import', 'Export']  
country_data[features_columns] = scaler.transform(country_data[features_columns])  


In [8]:
country_data

Unnamed: 0_level_0,Inflation,Population,GDP,Export,Import
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1995,2.046269,-1.620659,614170000000.0,-1.430207,-1.60864
1996,1.208922,-1.563884,642251400000.0,-1.333622,-1.535775
1997,0.016486,-1.506516,589739800000.0,-1.39982,-1.587932
1998,-0.08621,-1.449632,618731500000.0,-1.283738,-1.385717
1999,0.271285,-1.393692,634394900000.0,-1.360438,-1.395951
2000,1.114798,-1.331717,598102900000.0,-1.251314,-1.199221
2001,1.232195,-1.234966,627798700000.0,-1.233608,-1.209962
2002,0.838531,-1.035968,708938200000.0,-1.139204,-1.108559
2003,0.818428,-0.777066,907963200000.0,-0.820687,-0.686691
2004,0.818699,-0.525641,1069829000000.0,-0.543822,-0.204391


# Counterfactual explanation

In [9]:
import dice_ml
from dice_ml.utils import helpers

In [11]:
import dice_ml
from dice_ml.utils import helpers

d = dice_ml.Data(dataframe=country_data, continuous_features=['Population', 'Inflation', 'Import', 'Export'], outcome_name='GDP')

m = dice_ml.Model(model=model, backend='sklearn', model_type='regressor') 

exp = dice_ml.Dice(d, m)

query_instance = country_data.drop(columns="GDP").loc[2021:2021]


desired_range = [1.5e+12, 1.7e+12]  

features_to_vary = ['Population', 'Inflation', 'Import', 'Export']
#permitted_range = {'Population':[0, 10000e6], 'Inflation':[0,20]}

dice_exp = exp.generate_counterfactuals(query_instance, total_CFs=4, 
                                        features_to_vary=features_to_vary,
                                        desired_range=desired_range)

dice_exp.visualize_as_dataframe(show_only_changes=True)

100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.72it/s]

Query instance (original outcome : 1525586853888.0)





Unnamed: 0,Inflation,Population,Export,Import,GDP
0,0.859168,1.013177,1.551466,1.345899,1525587000000.0



Diverse Counterfactual set (new outcome: [1500000000000.0, 1700000000000.0])


Unnamed: 0,Inflation,Population,Export,Import,GDP
0,-,-,-,0.99892586,-
1,1.136458053,-,-,-,-
2,-,-1.286463237,-,-,-
3,-,0.85935678,-,-,-


In [18]:
cf_df = dice_exp.cf_examples_list[0].final_cfs_df

In [19]:
cf_df

Unnamed: 0,Inflation,Population,Export,Import,GDP
0,0.981015,1412360000.0,132.828796,160.079062,18556110000000.0
1,4.816369,1412360000.0,118.151153,160.079062,18947020000000.0
2,0.981015,1412360000.0,130.004066,160.079062,18682590000000.0
3,14.690764,1412360000.0,100.306191,160.079062,19060490000000.0


In [20]:
cf_df[features_columns] = scaler.inverse_transform(cf_df[features_columns])

In [21]:
cf_df

Unnamed: 0,Inflation,Population,Export,Import,GDP
0,11.927732,1.433516e+17,6141.730959,8140.725508,18556110000000.0
1,37.267578,1.433516e+17,5467.816161,8140.725508,18947020000000.0
2,11.927732,1.433516e+17,6012.035238,8140.725508,18682590000000.0
3,102.506848,1.433516e+17,4648.475875,8140.725508,19060490000000.0


In [15]:
cf_df.to_csv('counterfactuals.csv', index=False)