In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler

from lightgbm import LGBMClassifier

import joblib

RAND=42

# Evaluation

In [2]:
def pipeline_preprocessing(df: pd.DataFrame, n: int = 5) -> np.ndarray:
    df = df.drop("pre_loans_total_overdue", axis=1)
    df = df.set_index('id')

    X = df.groupby(level=0).last()

    for i in range(2, n+1):
        X = X.join(df.iloc[:,1:].groupby(level=0).nth(-i), rsuffix=f'_{i}', how='left')

    X = X.fillna(0)
    
    return X

In [3]:
test_path = '../data/raw/test_data/test_data'

test_data_0 = pd.read_parquet(test_path + '_0.pq')
test_data_1 = pd.read_parquet(test_path + '_1.pq')

In [4]:
models_path = "../data/models.joblib"

models = joblib.load(models_path)

model = models['lightgbm']

In [5]:
eval_data_0 = pipeline_preprocessing(test_data_0)
eval_data_1 = pipeline_preprocessing(test_data_1)

In [6]:
y_score_0 = model.predict_proba(eval_data_0)
y_score_1 = model.predict_proba(eval_data_1)

y_score = np.concatenate((y_score_0, y_score_1), axis=0)

In [8]:
target_path = "../data/raw/test_target.csv"

test_target = pd.read_csv(target_path)
test_target = test_target.set_index('id')

In [9]:
test_target['score'] = y_score[:, 1]

In [11]:
sub_path = '../sub.csv'
test_target.to_csv(sub_path)