In [1]:
import pandas as pd
import polars as pl
import numpy as np

ROOT_DIR = './../home-credit-credit-risk-model-stability/'

In [2]:
# Utils to format the tables
def set_table_dtypes(df):
    # implement here all desired dtypes for tables
    # the following is just an example
    for col in df.columns:
        # last letter of column name will help you determine the type
        if col[-1] in ("P", "A"):
            df = df.with_columns(pl.col(col).cast(pl.Float64).alias(col))

    return df

def inspect_columns(df):
    df = pd.DataFrame(df)
    result = pd.DataFrame({
        'unique': df.nunique() == len(df),
        'cardinality': df.nunique(),
        'with_null': df.isna().any(),
        'null_pct': round((df.isnull().sum() / len(df)) * 100, 2),
        '1st_row': df.iloc[0],
        'random_row': df.iloc[np.random.randint(low=0, high=len(df))],
        'last_row': df.iloc[-1],
        'dtype': df.dtypes
    })
    return result

In [3]:
train_appl_prev_depth1 = pl.concat(
    [
        pl.read_csv(ROOT_DIR + "csv_files/train/train_applprev_1_0.csv").pipe(set_table_dtypes),
        pl.read_csv(ROOT_DIR + "csv_files/train/train_applprev_1_1.csv").pipe(set_table_dtypes),
    ],
    how="vertical_relaxed",
)
train_appl_prev_depth1

case_id,actualdpd_943P,annuity_853A,approvaldate_319D,byoccupationinc_3656910L,cancelreason_3545846M,childnum_21L,creationdate_885D,credacc_actualbalance_314A,credacc_credlmt_575A,credacc_maxhisbal_375A,credacc_minhisbal_90A,credacc_status_367L,credacc_transactions_402L,credamount_590A,credtype_587L,currdebt_94A,dateactivated_425D,district_544M,downpmt_134A,dtlastpmt_581D,dtlastpmtallstes_3545839D,education_1138M,employedfrom_700D,familystate_726L,firstnonzeroinstldate_307D,inittransactioncode_279L,isbidproduct_390L,isdebitcard_527L,mainoccupationinc_437A,maxdpdtolerance_577P,num_group1,outstandingdebt_522A,pmtnum_8L,postype_4733339M,profession_152M,rejectreason_755M,rejectreasonclient_4145042M,revolvingaccount_394A,status_219L,tenor_203L
i64,f64,f64,str,f64,str,f64,str,f64,f64,f64,f64,str,f64,f64,str,f64,str,str,f64,str,str,str,str,str,str,str,bool,bool,f64,f64,i64,f64,f64,str,str,str,str,f64,str,f64
2,0.0,640.2,,,"""a55475b1""",0.0,"""2013-04-03""",,0.0,,,,,10000.0,"""CAL""",,,"""P136_108_173""",0.0,,,"""P97_36_170""","""2010-02-15""","""SINGLE""","""2013-05-04""","""CASH""",false,,8200.0,,0,,24.0,"""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""",,"""D""",24.0
2,0.0,1682.4,,,"""a55475b1""",0.0,"""2013-04-03""",,0.0,,,,,16000.0,"""CAL""",,,"""P136_108_173""",0.0,,,"""P97_36_170""","""2010-02-15""","""SINGLE""","""2013-05-04""","""CASH""",false,,8200.0,,1,,12.0,"""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""",,"""D""",12.0
3,0.0,6140.0,,,"""P94_109_143""",,"""2019-01-07""",,0.0,,,,,59999.8,"""CAL""",,,"""P131_33_167""",0.0,,,"""P97_36_170""","""2018-05-15""","""MARRIED""","""2019-02-07""","""CASH""",false,,11000.0,,0,,12.0,"""a55475b1""","""a55475b1""","""P94_109_143""","""a55475b1""",,"""D""",12.0
4,0.0,2556.6,,,"""P24_27_36""",,"""2019-01-08""",,0.0,,,,,40000.0,"""CAL""",,,"""P194_82_174""",0.0,,,"""a55475b1""",,,"""2019-02-08""","""CASH""",false,,16000.0,,0,,24.0,"""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""",,"""T""",24.0
5,0.0,,,,"""P85_114_140""",,"""2019-01-16""",,,,,,,,,,,"""P54_133_26""",,,,"""a55475b1""",,,,,false,,62000.0,,0,,,"""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""",,"""T""",
6,0.0,1773.8,,,"""P94_109_143""",,"""2018-09-12""",,0.0,,,,,15980.0,"""CAL""",,,"""P82_154_182""",0.0,,,"""a55475b1""",,,"""2018-10-12""","""CASH""",false,,37000.0,,0,,11.0,"""a55475b1""","""a55475b1""","""P94_109_143""","""a55475b1""",,"""D""",11.0
6,0.0,4189.6,,,"""P94_109_143""",0.0,"""2017-12-28""",,0.0,,,,,32000.0,"""CAL""",,,"""P82_154_182""",0.0,,,"""P97_36_170""","""2013-09-15""","""SINGLE""","""2018-01-28""","""CASH""",false,,35000.0,,1,,11.0,"""a55475b1""","""a55475b1""","""P94_109_143""","""a55475b1""",,"""D""",11.0
6,0.0,1110.4,,1.0,"""a55475b1""",0.0,"""2014-11-18""",,0.0,,,,,17380.0,"""COL""",0.0,,"""a55475b1""",0.0,,,"""P97_36_170""","""2012-09-15""","""SINGLE""","""2014-12-18""","""POS""",false,,14000.0,,2,0.0,24.0,"""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""",,"""D""",24.0
10,0.0,10916.601,"""2019-01-11""",,"""P73_130_169""",,"""2019-01-11""",,0.0,,,,,100000.0,"""CAL""",,,"""P38_65_49""",0.0,,,"""P17_36_170""",,"""MARRIED""","""2019-02-11""","""CASH""",false,,57000.0,,0,,12.0,"""a55475b1""","""a55475b1""","""a55475b1""","""a55475b1""",,"""T""",12.0
13,0.0,5069.6,,,"""P94_109_143""",,"""2018-08-20""",,0.0,,,,,40000.0,"""CAL""",,,"""P120_80_181""",0.0,,,"""a55475b1""",,,"""2018-09-20""","""CASH""",false,,64000.0,,0,,12.0,"""a55475b1""","""a55475b1""","""P94_109_143""","""a55475b1""",,"""D""",12.0


In [1]:
inspect_columns(train_appl_prev_depth1)

NameError: name 'inspect_columns' is not defined