In [1]:
import sys
import os
import warnings

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.figure_factory as ff

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

sys.path.append(os.path.abspath('..'))
from credmodex.credlab import CredLab
import credmodex

from sklearn.linear_model import LogisticRegression
from credmodex.utils import plotly_main_layout

In [2]:
df = {
    'Grade': [0]*(95+309) + [1]*(187+224) + [2]*(549+299) + [3]*(1409+495) + [4]*(3743+690) + [5]*(4390+424) + [6]*(2008+94) + [7]*(593+8),
    'y_true': [0]*95+[1]*309 + [0]*187+[1]*224 + [0]*549+[1]*299 + [0]*1409+[1]*495 + [0]*3743+[1]*690 + [0]*4390+[1]*424 + [0]*2008+[1]*94 + [0]*593+[1]*8,
    'y_pred': [309/(95+309)]*(95+309) + [224/(187+224)]*(187+224) + [299/(549+299)]*(549+299) + [495/(1409+495)]*(1409+495) + [690/(3743+690)]*(3743+690) + [424/(4390+424)]*(4390+424) + [94/(2008+94)]*(2008+94) + [8/(593+8)]*(593+8)
}
df = pd.DataFrame(df)

In [3]:
df = {
    "I": list(range(1, 10)),
    "Y": [1, 0, 1, 1, 1, 0, 1, 0, 1,],
    "mu": [0.90, 0.10, 0.80, 0.70, 0.45, 0.35, 0.70, 0.20, 0.80],
}
df = pd.DataFrame(df)

In [4]:
df = pd.read_csv(r'df.csv')
df['data'] = df['data'].astype('datetime64[ns]')
df = df[df['data'] <= '2024-07-30']
df[df['score scr'] == -999999.0] = np.nan

In [5]:
features = ['idade','score scr','score pod','score nv','total_consultas','total_spc','cidade_loja_cliente']

project = CredLab(
    df, target='over', features=features, time_column='data',
    test_size=0.2, split_type='random'
)

In [6]:
def treatment_func(df):
    df = project.df.copy()
    df = df.dropna()
    return df

project.add_model(
    model=LogisticRegression(max_iter=5000, solver='saga'),
    treatment=treatment_func,
)

In [7]:
project.LogisticRegression_1.add_rating()

<credmodex.rating.calinski_harabasz_binning.CH_Binning at 0x1ec7e3dfbc0>

In [8]:
project.LogisticRegression_1.rating.plot_gains_per_risk_group()

In [9]:
credmodex.discriminancy.GoodnessFit.deviance_odds(
    y_true=project.LogisticRegression_1.df['over'],
    y_pred=project.LogisticRegression_1.df['score'],
    p_value=True
)

{'power': -21.63,
 'accuracy': 95.6,
 'conclusion': '⚠️ The model has negative predictive power, meaning it ranks outcomes worse than random. This suggests either a serious model flaw or a reversal in prediction logic (e.g., predicting the opposite class). ✅ The model is well-calibrated, with high naïve accuracy suggesting predicted probabilities align closely with observed outcomes.'}

In [10]:
project.eval_goodness_of_fit(method='psi').plot()

In [11]:
bins = credmodex.rating.CH_Binning()

In [12]:
bins.fit_transform(project.model.df['score'], project.model.df['over'])

array(['[0.53, 0.58)', '(-inf, 0.42)', '[0.42, 0.46)', ...,
       '[0.58, 0.64)', '[0.53, 0.58)', '(-inf, 0.42)'],
      shape=(104510,), dtype=object)

In [13]:
bins.binning_table.build()

Unnamed: 0,Bin,Count,Count (%),Non-event,Event,Event rate,WoE,IV,JS
0,"(-inf, 0.42)",8147,0.077954,2848,5299,0.650423,-0.834512,0.053593,0.006511
1,"[0.42, 0.46)",14979,0.143326,6258,8721,0.582215,-0.545484,0.042833,0.005289
2,"[0.46, 0.50)",15418,0.147527,7480,7938,0.514853,-0.27304,0.01109,0.001382
3,"[0.50, 0.53)",11946,0.114305,6347,5599,0.468692,-0.088217,0.000893,0.000112
4,"[0.53, 0.58)",14762,0.14125,8537,6225,0.421691,0.102225,0.001467,0.000183
5,"[0.58, 0.64)",17644,0.168826,11057,6587,0.373328,0.304355,0.015274,0.001902
6,"[0.64, 0.67)",5256,0.050292,3497,1759,0.334665,0.473549,0.010805,0.001338
7,"[0.67, 0.74)",9744,0.093235,6817,2927,0.30039,0.63183,0.034894,0.004291
8,"[0.74, inf)",6614,0.063286,4974,1640,0.247959,0.895917,0.045612,0.005518
9,Special,0,0.0,0,0,0.0,0.0,0.0,0.0
