In [1]:
import numpy as np
import pandas as pd
import joblib
import warnings
warnings.filterwarnings('ignore')

In [2]:
country = 'CHN'
model = joblib.load(f'models/{country}_model.pkl')
scaler = joblib.load(f'scalers/{country}_scaler.pkl')


new_data = {
    'Population' : 47415794, 
    'Inflation' : 3.0931351197642, 
    'Import' : 134.0948592709,
    'Export' : 135.6812883345
}

df = pd.DataFrame([new_data])

In [3]:
main_data = pd.read_csv("combined_data.csv", index_col=[0, 1])

In [4]:
main_data 

Unnamed: 0_level_0,Unnamed: 1_level_0,Inflation,Population,GDP,Export,Import
Country Code,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ABW,1980,,59909.0,,,
ABW,1981,,60563.0,,,
ABW,1982,,61276.0,,,
ABW,1983,,62228.0,,,
ABW,1984,,62901.0,,,
...,...,...,...,...,...,...
ZWE,2017,0.893962,14812482.0,5.107466e+10,107.151887,83.660837
ZWE,2018,10.618866,15034452.0,3.415607e+10,124.909506,105.579357
ZWE,2019,255.304991,15271368.0,2.571741e+10,131.425343,79.585840
ZWE,2020,557.201817,15526888.0,2.686794e+10,135.325610,82.633621


In [5]:
country_data = main_data.loc['CHN']
country_data

Unnamed: 0_level_0,Inflation,Population,GDP,Export,Import
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1980,,981235000.0,191149200000.0,0.79611,1.18692
1981,,993885000.0,195866400000.0,0.967993,1.310348
1982,,1008630000.0,205089700000.0,0.981804,1.147856
1983,,1023310000.0,230686700000.0,0.977625,1.273147
1984,,1036825000.0,259946500000.0,1.149741,1.631462
1985,,1051040000.0,309488000000.0,1.203008,2.514868
1986,,1066790000.0,300758100000.0,1.361004,2.553675
1987,7.233836,1084035000.0,272973000000.0,1.734663,2.572245
1988,18.811818,1101630000.0,312353600000.0,2.090023,3.289589
1989,18.245638,1118650000.0,347768100000.0,2.310919,3.520053


In [6]:
country_data = country_data.dropna()

In [7]:
features_columns = ['Population', 'Inflation', 'Import', 'Export']  
country_data[features_columns] = scaler.transform(country_data[features_columns])  


In [8]:
country_data

Unnamed: 0_level_0,Inflation,Population,GDP,Export,Import
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1987,0.389341,-2.067136,272973000000.0,-1.002608,-1.010828
1988,2.28283,-1.882355,312353600000.0,-0.994544,-0.996136
1989,2.190236,-1.703613,347768100000.0,-0.989532,-0.991416
1990,-0.294518,-1.529965,360857900000.0,-0.979997,-0.998481
1991,-0.212028,-1.366188,383373300000.0,-0.970197,-0.985747
1992,0.245448,-1.217166,426915700000.0,-0.957192,-0.965274
1993,1.595668,-1.075706,444731300000.0,-0.950401,-0.93678
1994,3.173346,-0.935033,564321900000.0,-0.921196,-0.922573
1995,1.952378,-0.798299,734484800000.0,-0.893475,-0.902451
1996,0.565857,-0.664977,863749300000.0,-0.891211,-0.894081


In [9]:
df

Unnamed: 0,Population,Inflation,Import,Export
0,47415794,3.093135,134.094859,135.681288


In [10]:
columns = ['Population', 'Inflation', 'Import', 'Export']
df_scaled = df.copy()
df_scaled[columns] = scaler.transform(df[columns])

In [11]:
df

Unnamed: 0,Population,Inflation,Import,Export
0,47415794,3.093135,134.094859,135.681288


In [12]:
df_scaled

Unnamed: 0,Population,Inflation,Import,Export
0,-12.953584,-0.287838,1.682871,2.036775


In [13]:
predicted_gdp = model.predict(df_scaled)

In [14]:
predicted_gdp

array([1.56044901e+13])

In [15]:
df['GDP'] = predicted_gdp
df

Unnamed: 0,Population,Inflation,Import,Export,GDP
0,47415794,3.093135,134.094859,135.681288,15604490000000.0


# Counterfactual explanation

In [16]:
import dice_ml
from dice_ml.utils import helpers

In [17]:
import dice_ml
from dice_ml.utils import helpers

d = dice_ml.Data(dataframe=country_data, continuous_features=['Population', 'Inflation', 'Import', 'Export'], outcome_name='GDP')

m = dice_ml.Model(model=model, backend='sklearn', model_type='regressor') 

exp = dice_ml.Dice(d, m)

query_instance = country_data.drop(columns="GDP").iloc[0:1]


desired_range = [1.590000e+13, 2.060449e+13]

features_to_vary = ['Population', 'Inflation', 'Import', 'Export']
#permitted_range = {'Population':[0, 10000e6], 'Inflation':[0,20]}

dice_exp = exp.generate_counterfactuals(query_instance, total_CFs=4, 
                                        features_to_vary=features_to_vary,
                                        desired_range=desired_range)

dice_exp.visualize_as_dataframe(show_only_changes=False)

100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  9.34it/s]

Query instance (original outcome : -368738861056.0)





Unnamed: 0,Inflation,Population,Export,Import,GDP
0,0.389341,-2.067136,-1.002608,-1.010828,-368738900000.0



Diverse Counterfactual set (new outcome: [15900000000000.0, 20604490000000.0])


Unnamed: 0,Inflation,Population,Export,Import,GDP
0,0.389341,-2.067136,2.253949,2.18326,16434800000000.0
1,0.389341,-1.057604,2.270114,2.088178,16273890000000.0
2,-0.400166,-2.067136,2.253949,2.18326,16477820000000.0
3,0.389341,0.083947,2.309149,2.029743,16284860000000.0


In [29]:
df_counterfactuals = dice_exp.visualize_as_dataframe(show_only_changes)

NameError: name 'show_only_changes' is not defined

In [25]:
df_counterfactuals.drop('GDP', axis = 1, inplace = True)

AttributeError: 'NoneType' object has no attribute 'drop'

In [None]:
result = scaler.inverse_transform(df_counterfactuals)