In [1]:
!kaggle competitions download -c aim-2024-local-contest-home-credit

zsh:1: command not found: kaggle


## Imports

In [2]:
import polars as pl
from datetime import date
import hvplot.polars
import xarray as xr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy as sci
import os
import panel as pn

## Annotation tables

In [3]:
# Feature definition
feat_def = pl.read_csv("aim-2024-local-contest-home-credit/feature_definitions.csv", use_pyarrow=True)
# Train Base Table
tr_base_table = pl.read_parquet("aim-2024-local-contest-home-credit/parquet_files/train_base_table.parquet", use_pyarrow=True)

In [4]:
feat_def

Variable,Description
str,str
"""actualdpd_943P…","""Days Past Due …"
"""actualdpdtoler…","""DPD of client …"
"""addres_distric…","""District of th…"
"""addres_role_87…","""Role of person…"
"""addres_zip_823…","""Zip code of th…"
…,…
"""totinstallast1…","""Total amount o…"
"""twobodfilling_…","""Type of applic…"
"""type_25L""","""Contact type o…"
"""typesuite_864L…","""Persons accomp…"


In [5]:
tr_base_table

case_id,date_decision,MONTH,WEEK_NUM,target
i64,str,i64,i64,i64
0,"""2019-01-03""",201901,0,0
1,"""2019-01-03""",201901,0,0
2,"""2019-01-04""",201901,0,0
3,"""2019-01-03""",201901,0,0
4,"""2019-01-04""",201901,0,1
…,…,…,…,…
2670550,"""2020-02-24""",202002,59,0
2670551,"""2020-02-24""",202002,59,0
2670552,"""2020-02-24""",202002,59,0
2670553,"""2020-02-24""",202002,59,0


In [6]:
tr_base_table = tr_base_table.cast({"date_decision": pl.Date})

In [7]:
import hvplot.pandas
time_series = (tr_base_table.group_by("date_decision")
               .agg(pl.col('target').sum().alias("credit_decision"))
               .sort("date_decision")
               )
df = time_series.to_pandas()
df.set_index('date_decision', inplace=True)
df.hvplot()

In [8]:
df.hvplot.violin(by='index.month')

## Parquet info

In [9]:
os.chdir("aim-2024-local-contest-home-credit/parquet_files/train")
os.listdir(".")

['train_person_2.parquet',
 'train_applprev_2.parquet',
 'train_credit_bureau_a_1_0.parquet',
 'train_static_0_0.parquet',
 'train_credit_bureau_a_2_4.parquet',
 'train_other_1.parquet',
 'train_credit_bureau_a_1_1.parquet',
 'train_static_0_1.parquet',
 'train_credit_bureau_a_2_5.parquet',
 'train_tax_registry_c_1.parquet',
 'train_credit_bureau_b_2.parquet',
 'train_base.parquet',
 'train_credit_bureau_a_2_10.parquet',
 'train_credit_bureau_a_2_7.parquet',
 'train_credit_bureau_a_1_3.parquet',
 'train_person_1.parquet',
 'train_credit_bureau_b_1.parquet',
 'train_tax_registry_b_1.parquet',
 'train_credit_bureau_a_2_6.parquet',
 'train_credit_bureau_a_1_2.parquet',
 'train_credit_bureau_a_2_3.parquet',
 'train_debitcard_1.parquet',
 'train_static_cb_0.parquet',
 'train_applprev_1_0.parquet',
 'train_applprev_1_1.parquet',
 'train_credit_bureau_a_2_2.parquet',
 'train_tax_registry_a_1.parquet',
 'train_credit_bureau_a_2_9.parquet',
 'train_deposit_1.parquet',
 'train_credit_bureau_a_2_

### Depth == 0

In [10]:
tr_base = pl.read_parquet("train_base.parquet", use_pyarrow=True)
tr_base

case_id,date_decision,MONTH,WEEK_NUM,target
i64,str,i64,i64,i64
0,"""2019-01-03""",201901,0,0
1,"""2019-01-03""",201901,0,0
2,"""2019-01-04""",201901,0,0
3,"""2019-01-03""",201901,0,0
4,"""2019-01-04""",201901,0,1
…,…,…,…,…
2703450,"""2020-10-05""",202010,91,0
2703451,"""2020-10-05""",202010,91,0
2703452,"""2020-10-05""",202010,91,0
2703453,"""2020-10-05""",202010,91,0


In [17]:
tr_base['case_id'].unique_counts()

case_id
u32
1
1
1
1
1
…
1
1
1
1


In [60]:
def set_table_dtypes(df: pl.DataFrame) -> pl.DataFrame:
    for col in df.columns:
        if col in ["case_id", "WEEK_NUM", "num_group1", "num_group2"]:
            df = df.with_columns(pl.col(col).cast(pl.Int64))
        elif col in ["date_decision"]:
            df = df.with_columns(pl.col(col).cast(pl.Date))
        elif col[-1] in ("P", "A"):
            df = df.with_columns(pl.col(col).cast(pl.Float64))
        elif col[-1] in ("M",):
            df = df.with_columns(pl.col(col).cast(pl.String))
        elif col[-1] in ("D",):
            df = df.with_columns(pl.col(col).cast(pl.Date))
    return df

train_static = pl.concat(
    [
        pl.read_parquet("train_static_0_0.parquet", use_pyarrow=True).pipe(set_table_dtypes),
        pl.read_parquet("train_static_0_1.parquet", use_pyarrow=True).pipe(set_table_dtypes),
    ],
    how="vertical_relaxed",
)
cb0 = pl.read_parquet("train_static_cb_0.parquet", use_pyarrow=True).pipe(set_table_dtypes)

In [61]:
train_static

case_id,actualdpdtolerance_344P,amtinstpaidbefduel24m_4187115A,annuity_780A,annuitynextmonth_57A,applicationcnt_361L,applications30d_658L,applicationscnt_1086L,applicationscnt_464L,applicationscnt_629L,applicationscnt_867L,avgdbddpdlast24m_3658932P,avgdbddpdlast3m_4187120P,avgdbdtollast24m_4525197P,avgdpdtolclosure24_3658938P,avginstallast24m_3658937A,avglnamtstart24m_4525187A,avgmaxdpdlast9m_3716943P,avgoutstandbalancel6m_4187114A,avgpmtlast12m_4525200A,bankacctype_710L,cardtype_51L,clientscnt12m_3712952L,clientscnt3m_3712950L,clientscnt6m_3712949L,clientscnt_100L,clientscnt_1022L,clientscnt_1071L,clientscnt_1130L,clientscnt_136L,clientscnt_157L,clientscnt_257L,clientscnt_304L,clientscnt_360L,clientscnt_493L,clientscnt_533L,clientscnt_887L,…,numinstpaidearlyest_4493214L,numinstpaidlastcontr_4325080L,numinstpaidlate1d_3546852L,numinstregularpaid_973L,numinstregularpaidest_4493210L,numinsttopaygr_769L,numinsttopaygrest_4493213L,numinstunpaidmax_3546851L,numinstunpaidmaxest_4493212L,numnotactivated_1143L,numpmtchanneldd_318L,numrejects9m_859L,opencred_647L,paytype1st_925L,paytype_783L,payvacationpostpone_4187118D,pctinstlsallpaidearl3d_427L,pctinstlsallpaidlat10d_839L,pctinstlsallpaidlate1d_3546856L,pctinstlsallpaidlate4d_3546849L,pctinstlsallpaidlate6d_3546844L,pmtnum_254L,posfpd10lastmonth_333P,posfpd30lastmonth_3976960P,posfstqpd30lastmonth_3976962P,previouscontdistrict_112M,price_1097A,sellerplacecnt_915L,sellerplacescnt_216L,sumoutstandtotal_3546847A,sumoutstandtotalest_4493215A,totaldebt_9A,totalsettled_863A,totinstallast1m_4525188A,twobodfilling_608L,typesuite_864L,validfrom_1069D
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,bool,str,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,f64,f64,f64,f64,f64,f64,f64,f64,str,str,str
0,,,1917.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,,,,,,,,,,0.0,0.0,0.0,,"""OTHER""","""OTHER""",,,,,,,24.0,0.0,0.0,,"""a55475b1""",,0.0,0.0,,,0.0,0.0,,"""BO""",,
1,,,3134.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,…,,,,,,,,,,0.0,0.0,0.0,,"""OTHER""","""OTHER""",,,,,,,18.0,0.0,0.0,,"""a55475b1""",,0.0,0.0,,,0.0,0.0,,"""BO""",,
2,,,4937.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,,,,,,,,,,0.0,0.0,0.0,false,"""OTHER""","""OTHER""",,,,,,,36.0,0.0,0.0,,"""a55475b1""",,0.0,0.0,,,0.0,0.0,,"""BO""","""AL""",
3,,,4643.6,0.0,0.0,1.0,0.0,2.0,0.0,1.0,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,,,,,,,,,,0.0,0.0,1.0,false,"""OTHER""","""OTHER""",,,,,,,12.0,0.0,0.0,,"""a55475b1""",,1.0,1.0,,,0.0,0.0,,"""BO""","""AL""",
4,,,3390.2,0.0,0.0,1.0,0.0,0.0,0.0,1.0,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,,,,,,,,,,0.0,0.0,0.0,false,"""OTHER""","""OTHER""",,,,,,,24.0,0.0,0.0,,"""a55475b1""",,0.0,0.0,,,0.0,0.0,,"""BO""","""AL""",
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2703450,0.0,176561.36,3675.4001,0.0,0.0,0.0,0.0,0.0,0.0,10.0,-23.0,-43.0,-23.0,0.0,7356.8003,,0.0,16392.496,6750.2,"""CA""",,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,99.0,12.0,4.0,113.0,113.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,false,"""OTHER""","""OTHER""",,0.9115,0.02655,0.0354,0.0354,0.0354,12.0,0.0,0.0,0.0,"""P123_39_170""",0.0,0.0,8.0,0.0,0.0,0.0,428159.66,14346.319,"""FO""",,
2703451,0.0,301276.47,7088.6,6191.6,0.0,0.0,5.0,0.0,0.0,5.0,-18.0,-12.0,-18.0,0.0,12553.2,,0.0,105129.31,15780.4,"""CA""",,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,70.0,0.0,1.0,75.0,75.0,11.0,11.0,11.0,11.0,0.0,0.0,0.0,false,"""OTHER""","""OTHER""",,0.94595,0.0,0.01351,0.0,0.0,24.0,0.0,0.0,0.0,"""P162_18_172""",,0.0,3.0,68098.4,68098.4,68098.4,701247.3,40499.805,"""FO""",,
2703452,0.0,14232.4,7788.8003,0.0,0.0,0.0,0.0,0.0,0.0,3.0,-12.0,,-16.0,1.0,2662.4001,,,,1500.6,"""CA""",,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,6.0,3.0,3.0,9.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,false,"""OTHER""","""OTHER""",,0.66667,0.0,0.33333,0.11111,0.0,11.0,0.0,0.0,0.0,"""P133_44_167""",0.0,0.0,1.0,0.0,0.0,0.0,24002.0,,"""BO""",,
2703453,0.0,197371.58,1195.4,2827.2,0.0,0.0,36.0,0.0,0.0,9.0,-33.0,-64.0,-34.0,0.0,8212.601,,0.0,47943.062,9921.2,"""CA""",,0.0,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,0.0,6.0,0.0,0.0,0.0,0.0,…,61.0,13.0,23.0,115.0,119.0,17.0,17.0,17.0,17.0,0.0,0.0,0.0,false,"""OTHER""","""OTHER""",,0.69643,0.04348,0.20536,0.10811,0.0991,6.0,0.0,0.0,0.0,"""P123_6_84""",0.0,0.0,4.0,46806.6,46806.6,46806.6,440145.3,5654.4,"""BO""",,


In [62]:
def drop_col_null(df):
    bad = []
    for col in df.columns:
        null_percentages = df[col].null_count() / df[col].len() 
        if null_percentages >= 0.05:
            bad.append(col)
    new_df = df.drop(bad)
    return new_df

In [63]:
train_static= drop_col_null(train_static)
train_static

case_id,annuity_780A,annuitynextmonth_57A,applicationcnt_361L,applications30d_658L,applicationscnt_1086L,applicationscnt_464L,applicationscnt_629L,applicationscnt_867L,clientscnt12m_3712952L,clientscnt3m_3712950L,clientscnt6m_3712949L,clientscnt_100L,clientscnt_1022L,clientscnt_1071L,clientscnt_1130L,clientscnt_157L,clientscnt_257L,clientscnt_304L,clientscnt_360L,clientscnt_493L,clientscnt_533L,clientscnt_887L,clientscnt_946L,credamount_770A,credtype_322L,currdebt_22A,currdebtcredtyperange_828A,deferredmnthsnum_166L,disbursedcredamount_1113A,disbursementtype_67L,downpmt_116A,homephncnt_628L,inittransactioncode_186L,isbidproduct_1095L,lastapprcommoditycat_1041M,lastapprcommoditytypec_5251766M,lastcancelreason_561M,lastrejectcommoditycat_161M,lastrejectcommodtypec_5251769M,lastrejectreason_759M,lastrejectreasonclient_4145040M,mobilephncnt_593L,numactivecreds_622L,numactivecredschannel_414L,numactiverelcontr_750L,numcontrs3months_479L,numinstls_657L,numnotactivated_1143L,numpmtchanneldd_318L,numrejects9m_859L,paytype1st_925L,paytype_783L,pmtnum_254L,previouscontdistrict_112M,sellerplacecnt_915L,sellerplacescnt_216L,totaldebt_9A,totalsettled_863A,twobodfilling_608L
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,f64,f64,f64,f64,str,f64,f64,str,bool,str,str,str,str,str,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str,f64,str,f64,f64,f64,f64,str
0,1917.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30000.0,"""CAL""",0.0,0.0,0.0,30000.0,"""GBA""",0.0,0.0,"""CASH""",false,"""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""",1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""OTHER""","""OTHER""",24.0,"""a55475b1""",0.0,0.0,0.0,0.0,"""BO"""
1,3134.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,19999.8,"""CAL""",0.0,0.0,0.0,19999.8,"""GBA""",0.0,0.0,"""CASH""",false,"""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""",1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""OTHER""","""OTHER""",18.0,"""a55475b1""",0.0,0.0,0.0,0.0,"""BO"""
2,4937.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,78000.0,"""CAL""",0.0,0.0,0.0,78000.0,"""GBA""",0.0,1.0,"""CASH""",false,"""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""",2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""OTHER""","""OTHER""",36.0,"""a55475b1""",0.0,0.0,0.0,0.0,"""BO"""
3,4643.6,0.0,0.0,1.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40000.0,"""CAL""",0.0,0.0,0.0,40000.0,"""GBA""",0.0,0.0,"""CASH""",false,"""a55475b1""","""a55475b1""","""P94_109_143""","""a55475b1""","""a55475b1""","""P94_109_143""","""a55475b1""",1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,"""OTHER""","""OTHER""",12.0,"""a55475b1""",1.0,1.0,0.0,0.0,"""BO"""
4,3390.2,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,44000.0,"""CAL""",0.0,0.0,0.0,44000.0,"""GBA""",0.0,1.0,"""CASH""",false,"""a55475b1""","""a55475b1""","""P24_27_36""","""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""",1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""OTHER""","""OTHER""",24.0,"""a55475b1""",0.0,0.0,0.0,0.0,"""BO"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2703450,3675.4001,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30000.0,"""CAL""",0.0,0.0,0.0,30000.0,"""GBA""",0.0,0.0,"""CASH""",true,"""P12_6_178""","""P142_50_170""","""a55475b1""","""a55475b1""","""a55475b1""","""P94_109_143""","""P94_109_143""",3.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,"""OTHER""","""OTHER""",12.0,"""P123_39_170""",0.0,8.0,0.0,428159.66,"""FO"""
2703451,7088.6,6191.6,0.0,0.0,5.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100000.0,"""CAL""",68098.4,68098.4,0.0,40739.54,"""GBA""",0.0,2.0,"""CASH""",true,"""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""",2.0,2.0,0.0,1.0,0.0,24.0,0.0,0.0,0.0,"""OTHER""","""OTHER""",24.0,"""P162_18_172""",0.0,3.0,68098.4,701247.3,"""FO"""
2703452,7788.8003,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60000.0,"""CAL""",0.0,0.0,0.0,60000.0,"""GBA""",0.0,0.0,"""CASH""",true,"""P159_130_59""","""P75_90_70""","""P180_60_137""","""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""",1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,"""OTHER""","""OTHER""",11.0,"""P133_44_167""",0.0,1.0,0.0,24002.0,"""BO"""
2703453,1195.4,2827.2,0.0,0.0,36.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,6000.0,"""CAL""",46806.6,46806.6,0.0,6000.0,"""GBA""",0.0,1.0,"""CASH""",true,"""a55475b1""","""a55475b1""","""a55475b1""","""P159_130_59""","""P174_113_42""","""a55475b1""","""a55475b1""",2.0,2.0,1.0,1.0,0.0,30.0,0.0,0.0,0.0,"""OTHER""","""OTHER""",6.0,"""P123_6_84""",0.0,4.0,46806.6,440145.3,"""BO"""


In [64]:
cb0 = drop_col_null(cb0)

In [65]:
cb0

case_id,description_5085714M,education_1103M,education_88M,maritalst_385M,maritalst_893M
i64,str,str,str,str,str
357,"""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1"""
381,"""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1"""
388,"""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1"""
405,"""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1"""
409,"""a55475b1""","""717ddd49""","""a55475b1""","""a7fcb6e5""","""a55475b1"""
…,…,…,…,…,…
2703450,"""2fc785b2""","""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1"""
2703451,"""2fc785b2""","""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1"""
2703452,"""2fc785b2""","""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1"""
2703453,"""2fc785b2""","""6b2ae0fa""","""a55475b1""","""3439d993""","""a55475b1"""


### Depth == 1

In [67]:
train_applprev = pl.concat(
    [
        pl.read_parquet("train_applprev_1_0.parquet", use_pyarrow=True).pipe(set_table_dtypes),
        pl.read_parquet("train_applprev_1_1.parquet", use_pyarrow=True).pipe(set_table_dtypes),
    ],
    how="vertical_relaxed",
)
train_credit_bureau_a = pl.concat(
    [
        pl.read_parquet("train_credit_bureau_a_1_0.parquet", use_pyarrow=True).pipe(set_table_dtypes),
        pl.read_parquet("train_credit_bureau_a_1_1.parquet", use_pyarrow=True).pipe(set_table_dtypes),
        pl.read_parquet("train_credit_bureau_a_1_2.parquet", use_pyarrow=True).pipe(set_table_dtypes),
        pl.read_parquet("train_credit_bureau_a_1_3.parquet", use_pyarrow=True).pipe(set_table_dtypes)
    ],
    how="vertical_relaxed",
)
train_credit_bureau_b = pl.read_parquet("train_credit_bureau_b_1.parquet", use_pyarrow=True).pipe(set_table_dtypes)

train_debitcard = pl.read_parquet("train_debitcard_1.parquet", use_pyarrow=True).pipe(set_table_dtypes)

train_deposit = pl.read_parquet("train_deposit_1.parquet", use_pyarrow=True).pipe(set_table_dtypes)

train_person = pl.read_parquet("train_person_1.parquet", use_pyarrow=True).pipe(set_table_dtypes)

train_tax_registry_a = pl.read_parquet("train_tax_registry_a_1.parquet", use_pyarrow=True).pipe(set_table_dtypes)

train_tax_registry_b = pl.read_parquet("train_tax_registry_b_1.parquet", use_pyarrow=True).pipe(set_table_dtypes)

train_tax_registry_c = pl.read_parquet("train_tax_registry_c_1.parquet", use_pyarrow=True).pipe(set_table_dtypes)

train_other = pl.read_parquet("train_other_1.parquet", use_pyarrow=True).pipe(set_table_dtypes)


In [75]:
train_applprev = drop_col_null(train_applprev)
train_applprev.columns

['case_id',
 'actualdpd_943P',
 'annuity_853A',
 'cancelreason_3545846M',
 'creationdate_885D',
 'credacc_credlmt_575A',
 'credamount_590A',
 'credtype_587L',
 'district_544M',
 'downpmt_134A',
 'education_1138M',
 'inittransactioncode_279L',
 'isbidproduct_390L',
 'mainoccupationinc_437A',
 'num_group1',
 'postype_4733339M',
 'profession_152M',
 'rejectreason_755M',
 'rejectreasonclient_4145042M',
 'status_219L']

In [74]:
train_credit_bureau_a = drop_col_null(train_credit_bureau_a)
train_credit_bureau_a.columns

['case_id',
 'classificationofcontr_13M',
 'classificationofcontr_400M',
 'contractst_545M',
 'contractst_964M',
 'description_351M',
 'financialinstitution_382M',
 'financialinstitution_591M',
 'num_group1',
 'purposeofcred_426M',
 'purposeofcred_874M',
 'subjectrole_182M',
 'subjectrole_93M']