# Import Libraries

In [None]:
# importing necessary libraries, modules and functions
from __future__ import unicode_literals

from Functions.General.FilesFlow import make_directory , current_path_and_path_list
from Functions.General.Commons import double_group
from Functions.General.Module import WoeAnalysis

import pandas as pd
import warnings
import dill

# set the maximum number of columns to be displayed when printing a DataFrame to None,
# pandas will display all columns of the DataFrame without truncating or hiding any columns
pd.set_option('display.max_columns', None)

warnings.filterwarnings('ignore')


# Path Control & Global Variables

In [None]:
# Obtaining the current path and list of directories in the path
curr_path, path_parts = current_path_and_path_list()

# Defining the path of the raw data to be imported
raw_dir = str(curr_path.parents[2]) + "\\Data\\Raw Data\\"

# Defining the path where processed data will be saved
proc_dir = make_directory(raw_dir[:-14] + "Data\\Scorecards\\" + path_parts[-2] + "\\Processed Data\\")
res_dir = make_directory(raw_dir[:-14] + "Data\\Scorecards\\" + path_parts[-2] + "\\Results\\")
model_dir = make_directory(raw_dir[:-14] + "Data\\Scorecards\\" + path_parts[-2] + "\\Model Flow\\")

## Import Data

In [None]:
# step 1: Load the pipeline for Data Preparation
with open(model_dir + 'Step 1 (Data Preparation).pkl', 'rb') as file:
    pipeline = dill.load(file)  # loading the pipeline

# executing the pipeline
df = pipeline.run()
# clearing pipeline 
pipeline.clear()

# step 2: Load the pipeline for Filling Data
with open(model_dir + 'Step 2 (Filling Data).pkl', 'rb') as file:
    pipeline = dill.load(file) # loading the pipeline

# executing the pipeline with the data taken as a result of executing 1st pipeline 
df = pipeline.run(initial_data=df)
# clearing pipeline 
pipeline.clear()

# loading variables
with open(model_dir + 'Variables.pkl', 'rb') as file:
    Variables = dill.load(file)

In [None]:
# preparing features and target variable
X = df.drop(columns=['Actual'])
y = df['Actual']

# clearing RAMs
del df

# intializing WoeAnalysis class
woe_analysis = WoeAnalysis()


In [None]:
# iterating over each variable in the Variable_types dictionary
for i in Variables["Variable_types"]:
    # checking type
    if Variables["Variable_types"][i] == "discrete":
        # doing WoE analysis for discrete variables
        woe_analysis.discrete(column=i, df=X, target=y)
    else:
        # doing WoE analysis for continuous variables using specified bins
        woe_analysis.continuous(column=i, bins=Variables["Variable_Ranges"][i], df=X, target=y)


In [7]:
woe_analysis.IV_excel

Unnamed: 0,Partitions,Total,Total Perc,Good,Good Rate,Bad,Bad Rate,Good Dist,Bad Dist,Woe,Good Rate Difference,Woe Difference,IV,PIV,Validation,Variable
0,ოვერდრაფტები,3,0.000374,2.0,66.666667,1.0,33.333333,0.000268,0.001780,-1.893448,,,inf,0.000029,True,Purpose
1,"სახლის, აგარაკის და სხვა მშენებლობა",6,0.000748,5.0,83.333333,1.0,16.666667,0.000670,0.001780,-0.977157,16.666667,0.916291,inf,0.000011,True,Purpose
2,სამომხმარებლო ვალდებულების დაფარვა,55133,6.870515,48445.0,87.869334,6688.0,12.130666,6.491523,11.905228,-0.606481,4.536001,0.370676,inf,0.032833,True,Purpose
3,იპოთეკური სესხი მშენებლობის პროცესში მყოფი უძრ...,20,0.002492,18.0,90.000000,2.0,10.000000,0.002412,0.003560,-0.389370,2.130666,0.217110,inf,0.000004,True,Purpose
4,სხვა პირადი ხარჯები,144319,17.984617,130407.0,90.360244,13912.0,9.639756,17.474249,24.764583,-0.348686,0.360244,0.040684,inf,0.025420,True,Purpose
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,"(-50.0, 0.0]",614691,76.601018,582650.0,94.787462,32041.0,5.212538,78.073809,57.035798,0.313976,,,0.236641,0.066054,True,RejectedAppCount
1,"(0.0, 1.0]",107387,13.382258,96345.0,89.717564,11042.0,10.282436,12.910016,19.655731,-0.420365,5.069899,0.734341,0.236641,0.028357,True,RejectedAppCount
2,"(1.0, 2.0]",37762,4.705791,32547.0,86.189820,5215.0,13.810180,4.361226,9.283159,-0.755449,3.527743,0.335083,0.236641,0.037183,True,RejectedAppCount
3,"(2.0, 3.0]",17050,2.124722,14319.0,83.982405,2731.0,16.017595,1.918714,4.861420,-0.929675,2.207416,0.174227,0.236641,0.027358,True,RejectedAppCount
