IMPORTS

In [17]:
import pandas as pd
import numpy as np 
import statsmodels.api as sm 
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, balanced_accuracy_score
from sklearn.model_selection import train_test_split


# FUNCTIONS 

In [18]:
def max_by_mean(data: pd.DataFrame, prefix: str) -> pd.Series:
    data_for_prefix = data[data.columns[data.columns.str.startswith(prefix)]]
    data_for_prefix_adj = data_for_prefix + 1

    return data_for_prefix_adj.max(axis=1)/data_for_prefix_adj.mean(axis=1)

def variance(data: pd.DataFrame, prefix: str) -> pd.Series:
    data_for_prefix = data[data.columns[data.columns.str.startswith(prefix)]]

    return data_for_prefix.var(axis=1)


def get_data_attention(data: pd.DataFrame):
    time_series_data = [ 'DPD_credit_card',
    'DPD_mortgage',
    'DPD_term_loan',
    'Default_flag',
    'Os_credit_card',
    'Os_mortgage',
    'Overdue_credit_card',
    'Overdue_mortgage',
    'Overdue_term_loan',]    

    data_attention = pd.DataFrame()   

    for columns in time_series_data:  
        data_attention[f'var_{columns}'] = variance(data, columns)
        data_attention[f'max_by_mean_{columns}'] = max_by_mean(data, columns)

    return data_attention

def attention_tree_model(data: pd.DataFrame, y: pd.Series) -> pd.Series:

    X = get_data_attention(data)
    # Initialize and train classifier
    dt_classifier = DecisionTreeClassifier(random_state=42)
    dt_classifier.fit(X, y)

    return dt_classifier



## Load data 

In [19]:
df = pd.read_csv('dane/in_time.csv')

X = df.drop('Target', axis=1)
y = df['Target']
X, y

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
X_train_tuning, X_val, y_train_tuning, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=1)

# Train all models 

### Attention tree

In [20]:
att_tree = attention_tree_model(X_train, y_train)

In [31]:
att_tree_data = att_tree.predict_proba(get_data_attention(X))
att_tree_data_final = att_tree_data[:,0]


# ADD all features 

att_tree_data_all = att_tree.predict_proba(get_data_attention(X))[:,0]
att_tree_data_test = att_tree.predict_poba(get_data_attention(X_test))[:,0]