# Подготовка данных для обучения

In [None]:
import pandas as pd

data_path = 'data/credit_history_all.csv'

df = pd.read_csv(data_path)

In [None]:
del df['created_timestamp']
del df['event_timestamp']
del df['loan_id']
del df['zipcode']
del df['dob_ssn']

In [None]:
df.head()

In [None]:
# df.to_csv('data/test_data.csv')

Для проверки в пайплайне с энкодером

In [None]:
X = df[df.columns.drop("loan_status")].copy()
y = df[["loan_status"]].copy()

In [None]:
X.columns

In [None]:
X[:5].values

In [None]:
categorical_features = [
    "person_home_ownership",
    "loan_intent",
    "city",
    "state",
    "location_type",
]

In [None]:
from sklearn.preprocessing import OrdinalEncoder

encoder = OrdinalEncoder()

encoder.fit(df[categorical_features])

In [None]:
import joblib

encoder_filename = "encoder.pkl"
_ = joblib.dump(encoder, encoder_filename)

In [None]:
target = "loan_status"

transform_training_df=df.copy()
transform_training_df[categorical_features] = encoder.transform(
    df[categorical_features]
)

train_X = transform_training_df[
    transform_training_df.columns.drop(target)
]
train_X = train_X.reindex(sorted(train_X.columns), axis=1)
train_Y = transform_training_df.loc[:, target]

In [None]:
train_X.head()

In [None]:
train_Y.head()

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(train_X, train_Y, test_size=0.10)

# DecisionTreeClassifier

In [None]:
from sklearn.tree import DecisionTreeClassifier

max_depth = 22

classifier = DecisionTreeClassifier(max_depth = max_depth)
classifier.fit(x_train, y_train)

In [None]:
DTC_filename = "DTC.pkl"
_ = joblib.dump(classifier, DTC_filename)

In [None]:
X[categorical_features] = encoder.transform(X[categorical_features])
X = X.reindex(sorted(X.columns), axis=1)

In [None]:
predictions = classifier.predict(X)

In [None]:
from sklearn.metrics import accuracy_score, make_scorer

accuracy = accuracy_score(y_true=y, y_pred = predictions)
print(accuracy)

In [None]:
predictions = classifier.predict(x_test)

accuracy = accuracy_score(y_true=y_test, y_pred = predictions)
print(accuracy)

# GradientBoostingClassifier

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

clf = GradientBoostingClassifier().fit(x_train, y_train)

In [None]:
GBC_filename = "GBC.pkl"
_ = joblib.dump(clf, GBC_filename)

In [None]:
predictions = clf.predict(X)

In [None]:
from sklearn.metrics import accuracy_score, make_scorer

accuracy = accuracy_score(y_true=y, y_pred = predictions)
print(accuracy)

In [None]:
predictions = clf.predict(x_test)

accuracy = accuracy_score(y_true=y_test, y_pred = predictions)
print(accuracy)

In [None]:
predictions

In [None]:
model = joblib.load("DTC.pkl")

In [None]:
predictions = model.predict(X)

In [None]:
predictions

In [None]:
https://github.com/SeldonIO/seldon-core/blob/master/components/routers/case_study/credit_card_default.ipynb

In [None]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import OrdinalEncoder

class DTCModel(object):  
    
    def __init__(self):
        
        self.categorical_features = [
            "person_home_ownership",
            "loan_intent",
            "city",
            "state",
            "location_type",
        ]
        
        self.encoder = joblib.load("encoder.pkl")
        
        print("Encoder loaded")
        
        self.model = joblib.load("DTC.pkl")
        
        print("Model loaded")
        
        self.cm = {"tp": 0, "fp": 0, "tn": 0, "fn": 0}

        self.tries = 0
        self.success = 0
        self.value = 0
        

    def predict(self,X,features_names):

        df = pd.Dataframe(X, columns=features_names)
        
        df[self.categorical_features] = self.encoder.transform(df[self.categorical_features])
        df = df.reindex(sorted(df.columns), axis=1)
        
        predictions = self.model.predict(df)
        
        return predictions

    def send_feedback(self, features, feature_names, reward, truth, routing=None):
        print("DTC model send-feedback entered")
        print(f"Truth: {truth}, Reward: {reward}")

        if reward == 1:
            if truth == 1:
                self.cm["tp"] += 1
            if truth == 0:
                self.cm["tn"] += 1
        if reward == 0:
            if truth == 1:
                self.cm["fn"] += 1
            if truth == 0:
                self.cm["fp"] += 1

        self.tries += 1
        self.success = self.success + 1 if reward else self.success
        self.value = self.success / self.tries

        print(self.cm)
        print(
            "Tries: %s, successes: %s, values: %s", self.tries, self.success, self.value
        )

    def metrics(self):
        tp = {
            "type": "GAUGE",
            "key": "true_pos_total",
            "value": self.cm["tp"],
            "tags": {"branch_name": "DTC"},
        }
        tn = {
            "type": "GAUGE",
            "key": "true_neg_total",
            "value": self.cm["tn"],
            "tags": {"branch_name": "DTC"},
        }
        fp = {
            "type": "GAUGE",
            "key": "false_pos_total",
            "value": self.cm["fp"],
            "tags": {"branch_name": "DTC"},
        }
        fn = {
            "type": "GAUGE",
            "key": "false_neg_total",
            "value": self.cm["fn"],
            "tags": {"branch_name": "DTC"},
        }

        value = {
            "type": "GAUGE",
            "key": "branch_value",
            "value": self.value,
            "tags": {"branch_name": "DTC"},
        }
        success = {
            "type": "GAUGE",
            "key": "n_success_total",
            "value": self.success,
            "tags": {"branch_name": "DTC"},
        }
        tries = {
            "type": "GAUGE",
            "key": "n_tries_total",
            "value": self.tries,
            "tags": {"branch_name": "DTC"},
        }

        return [tp, tn, fp, fn, value, success, tries]

In [None]:
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import OrdinalEncoder

class GBCModel(object):  
    
    def __init__(self):
        
        self.categorical_features = [
            "person_home_ownership",
            "loan_intent",
            "city",
            "state",
            "location_type",
        ]
        
        self.encoder = joblib.load("encoder.pkl")
        
        print("Encoder loaded")
        
        self.model = joblib.load("GBC.pkl")
        
        print("Model loaded")

    def predict(self,X,features_names):

        df = pd.Dataframe(X, columns=features_names)
        
        df[self.categorical_features] = self.encoder.transform(df[self.categorical_features])
        df = df.reindex(sorted(df.columns), axis=1)
        
        predictions = self.model.predict(df)
        
        return predictions

    def send_feedback(self, features, feature_names, reward, truth, routing=None):
        print("GBC model send-feedback entered")
        print(f"Truth: {truth}, Reward: {reward}")

        if reward == 1:
            if truth == 1:
                self.cm["tp"] += 1
            if truth == 0:
                self.cm["tn"] += 1
        if reward == 0:
            if truth == 1:
                self.cm["fn"] += 1
            if truth == 0:
                self.cm["fp"] += 1

        self.tries += 1
        self.success = self.success + 1 if reward else self.success
        self.value = self.success / self.tries

        print(self.cm)
        print(
            "Tries: %s, successes: %s, values: %s", self.tries, self.success, self.value
        )

    def metrics(self):
        tp = {
            "type": "GAUGE",
            "key": "true_pos_total",
            "value": self.cm["tp"],
            "tags": {"branch_name": "GBC"},
        }
        tn = {
            "type": "GAUGE",
            "key": "true_neg_total",
            "value": self.cm["tn"],
            "tags": {"branch_name": "GBC"},
        }
        fp = {
            "type": "GAUGE",
            "key": "false_pos_total",
            "value": self.cm["fp"],
            "tags": {"branch_name": "GBC"},
        }
        fn = {
            "type": "GAUGE",
            "key": "false_neg_total",
            "value": self.cm["fn"],
            "tags": {"branch_name": "GBC"},
        }

        value = {
            "type": "GAUGE",
            "key": "branch_value",
            "value": self.value,
            "tags": {"branch_name": "GBC"},
        }
        success = {
            "type": "GAUGE",
            "key": "n_success_total",
            "value": self.success,
            "tags": {"branch_name": "GBC"},
        }
        tries = {
            "type": "GAUGE",
            "key": "n_tries_total",
            "value": self.tries,
            "tags": {"branch_name": "GBC"},
        }

        return [tp, tn, fp, fn, value, success, tries]