In [1]:
import numpy as np
import pandas as pd
import pickle
import re

from lightgbm import LGBMClassifier


class credit_scorer:
    '''Create a object to implement credit scoring.
    '''
    def __init__(self, preprocess_path:str, model_path:str):
        self.preprocessor = self.get_preprocess(preprocess_path)
        self.clf = self.get_model(model_path)
        self.scorer_meaning = {
            False : 'No payement difficulties',
            True : 'Payement difficulties'}
    
    def get_model(self, model_path:str):
        '''Open the pkl file which store the model.
        Arguments: 
            model_path: Path model with pkl extension
        
        Returns:
            model: Model object
        '''

        with open(model_path,"rb") as f:
            clf = pickle.load(f)
        
        return clf
    
    def get_preprocess(self, preprocess_path:str):
        '''Open the pkl file which store the scaler.
        Arguments: 
            scaler_path: Path scaler with pkl extension
        
        Returns:
            scaler: scaler object
        '''

        with open(preprocess_path,"rb") as f:
            preprocessor = pickle.load(f)
        
        return preprocessor

    def transfrom(self, data, client_id:dict):
        '''Preprocess the features for prediction
        '''
        try: 
            # Read data
            df = data.copy()
            df = df.replace([np.inf, -np.inf], np.nan)
            id = client_id['id']
            df = df[df['SK_ID_CURR'] == id]

            X = df.drop(['TARGET', 'SK_ID_CURR'], axis=1)
            y = df['TARGET']

            X = self.preprocessor.transform(X)
        except: 
            print('This client is not in the database...')

        return X

    def make_prediction(self, features)->str:
        '''Predicts the credit score.
        Argument:
            features: list
        
        return:
            cluster: str
        '''
        if isinstance(features, str):
            score = 'This client is not in the database...'
        else: 
            prob = self.clf.predict_proba(features)[:, 1]

            pred = (prob >= 0.47)[0]

            score = self.scorer_meaning[pred]

        return prob, score

In [2]:
scorer = credit_scorer('pipeline', 'classifier')

In [22]:
from pydantic import BaseModel

class Customer(BaseModel):
    id: int

m = Customer(id=100006)

In [17]:
m.dict()['id']

100004

In [18]:
df = pd.read_csv('model_dataset.csv',
                            engine='pyarrow',
                            verbose=False,
                            encoding='ISO-8859-1',
                            )

In [23]:
features = scorer.transfrom(df, m.dict())

In [24]:
features

array([[ 7.20800114e-01, -7.18646197e-01, -1.31255508e-01,
        -7.11391033e-01,  1.77827361e-01, -6.53350631e-01,
        -9.29746928e-01, -6.80187496e-01, -2.82159908e-01,
        -1.37683173e+00,  3.69897368e-01, -4.99189427e-01,
        -6.27087115e-02,             nan,  7.11301940e-01,
                    nan,             nan, -3.23996515e-01,
        -2.78010758e-01,  4.19224712e-01,  6.39370743e-01,
                    nan,  2.40022713e-02,  5.90333964e-02,
        -2.38361971e-01,  4.13470646e-01,  1.83629222e+00,
                    nan,             nan,             nan,
                    nan,             nan,             nan,
                    nan,             nan,             nan,
                    nan,             nan,             nan,
                    nan,             nan,             nan,
                    nan,             nan,             nan,
                    nan,             nan,             nan,
                    nan,             nan,             na

In [26]:
prob, info = scorer.make_prediction(features)

In [28]:
prob[0]

0.27772680638972275

In [29]:
df_sample = df.sample(10000)

In [30]:
df_sample.to_csv('dataset_sample.csv', index=False)