In [None]:
import sys
import os
import warnings

import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.figure_factory as ff

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

sys.path.append(os.path.abspath('..'))
from credmodex.credlab import CredLab
import credmodex

from sklearn.linear_model import LogisticRegression
from credmodex.utils import plotly_main_layout

In [None]:
def get_structure(root_dir):
    structure = []
    for root, dirs, files in os.walk(root_dir):
        # Modify dirs in-place to skip __pycache__
        dirs[:] = [d for d in dirs 
                   if (d != '__pycache__')]

        level = root.replace(root_dir, '').count(os.sep)
        indent = ' ' * 4 * level
        structure.append(f'{indent}{os.path.basename(root)}/')

        subindent = ' ' * 4 * (level + 1)
        for f in files:
            structure.append(f'{subindent}{f}')
    return '\n'.join(structure)

print(get_structure(r'C:\Users\gustavo.filho\Documents\Python\Modules\Credit Risk\credmodex'))

In [None]:
df = {
    "rating": list(range(1, 10)),
    "target": [1, 0, 1, 1, 1, 0, 1, 0, 1,],
    "score": [0.90, 0.10, 0.80, 0.70, 0.45, 0.35, 0.70, 0.20, 0.80],
}
df = pd.DataFrame(df)

In [None]:
df = {
    'rating': [0]*(95+309) + [1]*(187+224) + [2]*(549+299) + [3]*(1409+495) + [4]*(3743+690) + [5]*(4390+424) + [6]*(2008+94) + [7]*(593+8),
    'target': [0]*95+[1]*309 + [0]*187+[1]*224 + [0]*549+[1]*299 + [0]*1409+[1]*495 + [0]*3743+[1]*690 + [0]*4390+[1]*424 + [0]*2008+[1]*94 + [0]*593+[1]*8,
    'score': [309/(95+309)]*(95+309) + [224/(187+224)]*(187+224) + [299/(549+299)]*(549+299) + [495/(1409+495)]*(1409+495) + [690/(3743+690)]*(3743+690) + [424/(4390+424)]*(4390+424) + [94/(2008+94)]*(2008+94) + [8/(593+8)]*(593+8)
}
df = pd.DataFrame(df)

In [None]:
df = pd.read_csv(r'df.csv')
df['data'] = df['data'].astype('datetime64[ns]')
df = df[df['data'] <= '2024-07-30']
df[df['score scr'] == -999999.0] = np.nan

In [None]:
features = ['idade','score scr','total_spc','cidade_loja_cliente']

project = CredLab(
    df, target='over', features=features, time_column='data',
    test_size=0.2, split_type='random'
)

In [None]:
def treatment_func(df):
    df = project.df.copy()
    df = df.dropna()
    return df

project.add_model(
    treatment=treatment_func,
)

project.model.add_rating()

In [None]:
project.model.df['score scr'][project.model.df['score scr'] <=0]

3831    0.0
Name: score scr, dtype: float64

In [None]:
credmodex.discriminancy.GoodnessFit.deviance_odds(
    y_true=project.model.df['over'],
    y_pred=project.model.df['score'],
    info=True,
)

{'power': 4.92,
 'accuracy': 100.0,
 'conclusion': '⚠️ The model has weak predictive power, indicating limited ability to rank or discriminate between outcomes. It may need retraining or feature engineering. ✅ The model is well-calibrated, with high naïve accuracy suggesting predicted probabilities align closely with observed outcomes.'}

In [None]:
project.eval_goodness_of_fit(method='gini',).plot(method='cap')

In [None]:
# dff = project.model.df[project.model.df['data'] <= '2024-03-30']

# plotly_main_layout(go.Figure().add_trace(go.Scatter(
#     mode='markers',
#     x=dff['score'],
#     y=dff['score scr']/1000,
#     marker=dict(color=dff['over']),
# )))