# Import Libraries

In [None]:
from __future__ import unicode_literals

# from module import WoeAnalysis
from sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np
import warnings
import pickle
import dill


# suppresses all warnings generated by the Python warnings module. 
warnings.filterwarnings('ignore')

# set the maximum number of columns to be displayed when printing a DataFrame to None,
# pandas will display all columns of the DataFrame without truncating or hiding any columns
pd.set_option('display.max_columns', None)

In [None]:
# reading pandas DataFrame from a pickle file 
df = pd.read_pickle("Data/Data S2.pkl")

## Split the data

In [None]:
# dropping actual column from dataframe
X = df.drop(columns=['Actual'])

# taking only actual column from dataframe 
y = df['Actual']

# split dataset into train/test parts
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y,random_state=42)

In [None]:
from ScoringPy import WoeAnalysis


# intialising WoeAnalysis class
woe_analysis = WoeAnalysis(save=False, path="Data/",type=2)

In [None]:
# have several paraeters path=path, name=name, format=file_format, type=type
woe_analysis.discrete(column="MaritalStatus", df=X_train, target=y_train).plot().report()

In [None]:
bins = pd.IntervalIndex.from_tuples([(-1,0),(0, 0.2), (0.2,0.35), (0.35, 0.45),(0.45, 0.55), (0.55, 0.65),(0.65, np.inf)])
woe_analysis.continuous(column="RefinanceRate", bins= bins,df=X_train, target=y_train).plot().report()


# Export data for woe_analysis

In [None]:
WoE_dict = woe_analysis.WoE_dict
Variable_types = woe_analysis.Variable_types
Variable_Ranges = woe_analysis.Variable_Ranges
IV_excel = woe_analysis.IV_excel
IV_dict = woe_analysis.IV_dict

In [None]:
from ScoringPy import WoeBinning

WoE_dict = woe_analysis.WoE_dict

woe_transform = WoeBinning(WoE_dict= WoE_dict, Production=False)
X_transformed = woe_transform.transform(X)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
# creating a Logistic Regression model with specified parameters
model = LogisticRegression(max_iter=1_000, class_weight='balanced', C=0.1)

# creating a pipeline consisting of the WoE transformation step followed by the Logistic Regression model
pipeline = Pipeline(steps=[('woe', woe_transform), ('logistic regression',model)])


# train the model
pipeline.fit(X,y)


In [None]:
from ScoringPy import CreditScoring
# Example usage
scoring = CreditScoring(data=df, model=model, WoE_dict=WoE_dict, production=True)

temp_df = scoring.apply(df)
df = temp_df.data
scorecard = temp_df.scorecard