In [1]:
# Модель

import pickle

class CustomScoringModel:

    def predict_proba(
        self,
        income_category: str,
        age_category: str,
        count_doc: int,
        inf_house: int,
        interest_rate_category: str,
        perc_money: int,
        ext1: int,
        ext2: int,
        ext3: int
    ) -> float:
        proba = 0
        # низкий дефолт 
        if ext1 > 0.7:
            proba -= 0.1
        if ext2 > 0.7:
            proba -= 0.1
        if ext3 > 0.7:
            proba -= 0.1
        # высокий дефолт
        if inf_house == 0:
            proba += 0.1
        if count_doc == 0:
            proba += 0.1
        if income_category == "poor":
            proba += 0.1
        if age_category == 'old' or age_category == 'young':
            proba += 0.1
        if perc_money > 0.25:
            proba += 0.1
        if interest_rate_category == 'disloyal':
            proba += 0.1
        if ext1 < 0.3:
            proba += 0.1
        if ext2 < 0.3:
            proba += 0.1
        if ext3 < 0.3:
            proba += 0.1
        return proba
    def save_model(
        self,
        file_path: str,
    ) -> None:
        """Функция принимает на вход путь, сохраняет по эту пути .pickle с моделью."""
        with open(file_path, 'wb') as file:
            pickle.dump(self, file)

In [2]:
# Инициализируем модель
model = CustomScoringModel()
model

<__main__.CustomScoringModel at 0x6176fd0>

In [3]:
# Сохраним модель
model_path = './model.pickle'
model.save_model('model.pickle')

In [4]:
from dataclasses import dataclass
from enum import Enum, auto

class ScoringDecision(Enum):
    """Возможные решения модели."""

    ACCEPTED = auto()
    DECLINED = auto()
    
@dataclass
class ScoringResult:
    """Класс, содержащий результаты скоринга."""

    decision: ScoringDecision
    amount: int
    threshold: float
    proba: float

In [5]:
@dataclass
class Features:
    """Фичи для принятия решения об одобрении."""

    income_category: str = 'other'
    age_category: str = 'other'
    count_doc: int = 'other'
    inf_house: int = 'other'
    interest_rate_category: str = 'other'
    perc_money: int = 'other'
    ext1: int = 'other'
    ext2: int = 'other'
    ext3: int = 'other'

In [6]:
class SimpleModel:
    """Класс для моделей c расчетом proba и threshold."""
    
    _threshold = 0

    def __init__(self, model_path: str):
        """Создает объект класса."""
        with open(model_path, 'rb') as pickled_model:
            self._model = pickle.load(pickled_model)

    def _predict_proba(self, features: Features) -> float:
        """Определяет вероятность невозврата займа.""" 
        res = self._model.predict_proba(
            features.income_category,
            features.age_category,
            features.count_doc,
            features.inf_house,
            features.interest_rate_category,
            features.perc_money,
            features.ext1,
            features.ext2,
            features.ext3,
        )
        return res

In [7]:
# Инициализируем модель
core_simple_model = SimpleModel('./model.pickle')

In [8]:
class Calculator:
    def calc_amount(
        self,
        proba: str,
        features: Features,
    ) -> int:
        """Функция принимает на вход вероятность дефолта и признаки и расчитывает одобренную сумму."""
        if proba < 0.10 or features.income_category == 'rich':
            return 500
        if features.age_category == 'other':
            return 200
        return 100

In [9]:
# унаследуемся от SimpleModel чтобы не переопределять одинаковый функционал
class AdvancedModel(SimpleModel):
    
    def __init__(self, model_path: str):
        super().__init__(model_path)
        self._calculator = Calculator()
    
    def get_scoring_result(self, features):        
        p = self._predict_proba(features)
        
        final_decision = ScoringDecision.DECLINED
        approved_amount = 0
        if p < 0.4:
            final_decision = ScoringDecision.ACCEPTED
            approved_amount = self._calculator.calc_amount(
                p,
                features,
            )

        return ScoringResult(decision=final_decision, amount=approved_amount, threshold=self._threshold, proba=p)

In [10]:
import json
from datetime import datetime

class Service:
    
    _model = AdvancedModel('./model.pickle')
    
    def _calculate_income_category(
        self,
        income: int,
    ) -> str:
        if income < 60_000:
            return "poor"
        if income > 135_000:
            return "rich"
        return "other"
    
    def _calculate_interest_rate_category(
        self,
        interest_rate: int,
    ) -> str:
        if interest_rate < 0.07:
            return "loyal"
        if interest_rate > 0.12:
            return "disloyal"
        return "other"
    
    def _calculate_age_category(
        self,
        age: int
    ) -> str:
        if age < 25:
            return "young"
        if age > 60:
            return "old"
        return "other"

In [11]:
# Инициализируем сервис
service = Service()

In [12]:
import pandas as pd

In [13]:
# Берем датасет с насчитанными признаками
df = pd.read_csv('application_train_test.csv') 
df.rename(columns={"Средний доход на взрослого": "income",
                   "Процентная ставка": "interest_rate",
                  'Кол-во документов': 'count_doc',
                  'Кол-во полных лет': 'age',
                  'Информация о доме': 'inf_house',
                  'Доля денег которые клиент отдает на займ за год': 'perc_money',
                  'скор внеешних источников1': 'ext1',
                  'скор внеешних источников2': 'ext2',
                  'скор внеешних источников3': 'ext3'}, inplace=True)


In [14]:
df

Unnamed: 0,TARGET,SK_ID_CURR,count_doc,inf_house,age,Год смены документа,Признак задержки смены документа,perc_money,Среднее кол-во детей в семье на одного взрослого,Средний доход на ребенка,income,interest_rate,ext1,ext2,ext3
0,1,100002,1,1,25,16,1,0.121978,0.0,inf,202500.0,0.115470,0.083037,0.262949,0.139376
1,0,100003,1,1,45,42,1,0.132217,0.0,inf,135000.0,0.048087,0.311267,0.622246,
2,0,100004,0,0,52,41,1,0.100000,0.0,inf,67500.0,0.000000,,0.555912,0.729567
3,0,100006,1,0,52,26,1,0.219900,0.0,inf,67500.0,0.060158,,0.650442,
4,0,100007,1,0,54,43,1,0.179963,0.0,inf,121500.0,0.000000,,0.322738,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
307506,0,456251,1,1,25,2,1,0.174971,0.0,inf,157500.0,0.171385,0.145570,0.681632,
307507,0,456252,1,1,56,44,1,0.166687,0.0,inf,72000.0,0.105790,,0.115992,
307508,0,456253,1,1,41,23,1,0.195941,0.0,inf,153000.0,0.084089,0.744026,0.535722,0.218859
307509,1,456254,1,1,32,25,1,0.118158,0.0,inf,85500.0,0.103765,,0.514163,0.661024


In [15]:
# насчитаем признаки
df['income_category'] = df['income'].apply(lambda x: service._calculate_income_category(x))
df.drop(columns=['income'], inplace=True)
df['age_category'] = df['age'].apply(lambda x: service._calculate_age_category(x))
df.drop(columns=['age'], inplace=True)
df['interest_rate_category'] = df['interest_rate'].apply(lambda x: service._calculate_interest_rate_category(x))
df.drop(columns=['interest_rate'], inplace=True)


In [16]:
# инициализируем модель
model = AdvancedModel('./model.pickle')

In [17]:
# для каждого "наблюдения" получим решение
df['result'] = df.apply(
    lambda x: model.get_scoring_result(
        Features(
            income_category=x['income_category'],
            age_category=x['interest_rate_category'],
            inf_house=x['inf_house'],
            count_doc=x['count_doc'],
            interest_rate_category=x['interest_rate_category'],
            perc_money=x['perc_money'],
            ext1=x['ext1'],
            ext2=x['ext2'],
            ext3=x['ext3'],
        )
    ),
    axis=1
)

In [18]:
df['decision'] = df['result'].apply(lambda x: x.decision.name)
df['amount'] = df['result'].apply(lambda x: x.amount)
df['proba'] = df['result'].apply(lambda x: x.proba)

In [19]:
df

Unnamed: 0,TARGET,SK_ID_CURR,count_doc,inf_house,Год смены документа,Признак задержки смены документа,perc_money,Среднее кол-во детей в семье на одного взрослого,Средний доход на ребенка,ext1,ext2,ext3,income_category,age_category,interest_rate_category,result,decision,amount,proba
0,1,100002,1,1,16,1,0.121978,0.0,inf,0.083037,0.262949,0.139376,rich,other,other,ScoringResult(decision=<ScoringDecision.ACCEPT...,ACCEPTED,500,0.3
1,0,100003,1,1,42,1,0.132217,0.0,inf,0.311267,0.622246,,other,other,loyal,ScoringResult(decision=<ScoringDecision.ACCEPT...,ACCEPTED,500,0.0
2,0,100004,0,0,41,1,0.100000,0.0,inf,,0.555912,0.729567,other,other,loyal,ScoringResult(decision=<ScoringDecision.ACCEPT...,ACCEPTED,100,0.1
3,0,100006,1,0,26,1,0.219900,0.0,inf,,0.650442,,other,other,loyal,ScoringResult(decision=<ScoringDecision.ACCEPT...,ACCEPTED,100,0.1
4,0,100007,1,0,43,1,0.179963,0.0,inf,,0.322738,,other,other,loyal,ScoringResult(decision=<ScoringDecision.ACCEPT...,ACCEPTED,100,0.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
307506,0,456251,1,1,2,1,0.174971,0.0,inf,0.145570,0.681632,,rich,other,disloyal,ScoringResult(decision=<ScoringDecision.ACCEPT...,ACCEPTED,500,0.2
307507,0,456252,1,1,44,1,0.166687,0.0,inf,,0.115992,,other,other,other,ScoringResult(decision=<ScoringDecision.ACCEPT...,ACCEPTED,200,0.1
307508,0,456253,1,1,23,1,0.195941,0.0,inf,0.744026,0.535722,0.218859,rich,other,other,ScoringResult(decision=<ScoringDecision.ACCEPT...,ACCEPTED,500,0.0
307509,1,456254,1,1,25,1,0.118158,0.0,inf,,0.514163,0.661024,other,other,other,ScoringResult(decision=<ScoringDecision.ACCEPT...,ACCEPTED,500,0.0


In [20]:
df['proba'].describe(percentiles = [.08, .92])

count    307511.000000
mean          0.130002
std           0.143162
min          -0.300000
8%           -0.100000
50%           0.100000
92%           0.300000
max           0.700000
Name: proba, dtype: float64

In [21]:
df['decision'].value_counts()

# отказали примерно такому же количеству клиентов как и исходном датасете

ACCEPTED    287951
DECLINED     19560
Name: decision, dtype: int64

In [22]:
df['TARGET'].corr(df['proba'])

# проба хорошо коррелирует с таргетом, на уровне индексов из внешних источников

0.16038620619167657