In [3]:
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
import random
import shap
import joblib
import matplotlib.pyplot as plt
from sklearn.cluster import AgglomerativeClustering
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from mlxtend.plotting import plot_confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
file_path = 'micro_world_139countries.csv'
df = pd.read_csv(file_path, encoding='ISO-8859-1')

In [105]:
print(list(df['economy'].unique()))

['Afghanistan', 'Albania', 'Algeria', 'Argentina', 'Armenia', 'Australia', 'Austria', 'Azerbaijan', 'Bangladesh', 'Belgium', 'Benin', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'Bulgaria', 'Burkina Faso', 'Cambodia', 'Cameroon', 'Canada', 'Chad', 'Chile', 'China', 'Colombia', 'Comoros', 'Congo, Dem. Rep.', 'Congo, Rep.', 'Costa Rica', 'Croatia', 'Cyprus', 'Czechia', "Côte d'Ivoire", 'Denmark', 'Dominican Republic', 'Ecuador', 'Egypt, Arab Rep.', 'El Salvador', 'Estonia', 'Eswatini', 'Ethiopia', 'Finland', 'France', 'Gabon', 'Gambia, The', 'Georgia', 'Germany', 'Ghana', 'Greece', 'Guatemala', 'Guinea', 'Honduras', 'Hong Kong SAR, China', 'Hungary', 'Iceland', 'India', 'Indonesia', 'Iran, Islamic Rep.', 'Iraq', 'Ireland', 'Israel', 'Italy', 'Jamaica', 'Japan', 'Jordan', 'Kazakhstan', 'Kenya', 'Korea, Rep.', 'Kosovo', 'Kyrgyz Republic', 'Lao PDR', 'Latvia', 'Lebanon', 'Lesotho', 'Liberia', 'Lithuania', 'Madagascar', 'Malawi', 'Malaysia', 'Mali', 'Malta', 'Mauritania', 'Mau

In [5]:
def load_model_objects():
    model_xgb = joblib.load('xgb_clf.joblib')
    scaler = joblib.load('scaler.joblib')
    encoder_y = joblib.load('encoder.joblib')  # Updated filename
    le_country_economy = joblib.load('country_encoder.joblib')
    le_regionwb = joblib.load('regionwb_encoder.joblib')
    return model_xgb, scaler, encoder_y, le_country_economy, le_regionwb

model_xgb, scaler, encoder, le_country_economy, le_regionwb = load_model_objects()

In [10]:
def process_data(df, _scaler, _label_encoder,region_encoder, country_encoder):
    
    #print(df)
    sample_df = df[['inc_q', 'remittances', 'educ', 'age', 'female', 'mobileowner','internetaccess', 'pay_utilities', 'receive_transfers','receive_pension', 'economy', 'regionwb','account']].sample(n=5000, random_state=42)
    #print(sample_df.columns)
    
    cols=list(sample_df.columns)
    cols.remove('account')
    #print(cols)
    sample_df = sample_df.dropna(subset=['account','inc_q', 'remittances', 'educ', 'age', 'female', 'mobileowner','internetaccess', 'pay_utilities', 'receive_transfers','receive_pension', 'economy', 'regionwb']) 
    
    
    
    sample_df['economy'] = country_encoder.fit_transform(sample_df['economy'])#Giving unique int values to economies
    sample_df['regionwb'] = region_encoder.fit_transform(sample_df['regionwb'])#Unique int values to regions

    X = sample_df.drop('account', axis=1)
    y = sample_df['account']
    
    y= _label_encoder.fit_transform(y)
    y=pd.DataFrame(y,columns=['account'])
    print(y.columns)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    X = pd.DataFrame(X)
    X.columns = cols
    #X['account'] = y['account']
    return X
 

In [11]:
test=process_data(df,scaler,encoder,le_regionwb,le_country_economy)

Index(['account'], dtype='object')


In [8]:
test

Unnamed: 0,inc_q,remittances,educ,age,female,mobileowner,internetaccess,pay_utilities,receive_transfers,receive_pension,economy,regionwb,account
0,0.568325,0.744310,0.183513,0.593205,-0.928079,-0.41603,-0.760305,-0.570759,0.412752,0.291572,-0.470087,-0.180559,0
1,0.568325,0.744310,-1.214680,1.681072,-0.928079,-0.41603,1.256419,0.993501,0.412752,0.291572,1.192070,1.184723,0
2,-1.537233,-1.566763,1.581706,0.170145,1.077494,-0.41603,-0.760305,-1.352889,0.412752,0.291572,0.968318,-1.090747,1
3,0.568325,0.744310,-1.214680,0.049271,-0.928079,-0.41603,1.256419,0.993501,0.412752,0.291572,-1.716704,0.729629,0
4,0.568325,0.744310,1.581706,-0.313351,1.077494,-0.41603,-0.760305,0.993501,0.412752,0.291572,-1.141342,-1.545841,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3995,-0.133528,0.744310,0.183513,-1.099033,1.077494,-0.41603,-0.760305,-1.352889,0.412752,0.291572,-1.460987,-0.180559,1
3996,1.270178,-1.566763,0.183513,-0.373788,1.077494,-0.41603,-0.760305,0.993501,0.412752,0.291572,-0.885626,1.184723,1
3997,1.270178,-0.411226,-1.214680,-0.615536,1.077494,-0.41603,-0.760305,-0.570759,0.412752,0.291572,0.456885,0.274535,0
3998,1.270178,-1.566763,1.581706,-0.373788,1.077494,-0.41603,-0.760305,-1.352889,-2.567407,0.291572,1.415821,-1.090747,1


In [12]:
prediction = model_xgb.predict(test)
print(prediction)