# 数据读取

### Pipeline

In [None]:
class Pipeline:
    @staticmethod
    def set_table_dtypes(df):
        for col in df.columns:
            if col in ["case_id", "WEEK_NUM", "num_group1", "num_group2"]:
                df = df.with_columns(pl.col(col).cast(pl.Int32))
            elif col in ["date_decision"]:
                df = df.with_columns(pl.col(col).cast(pl.Date))
            elif col[-1] in ("P", "A"):
                df = df.with_columns(pl.col(col).cast(pl.Float64))
            elif col[-1] in ("M",):
                df = df.with_columns(pl.col(col).cast(pl.String))
            elif col[-1] in ("D",):
                df = df.with_columns(pl.col(col).cast(pl.Date))            

        return df
    
    @staticmethod
    def handle_dates(df):
        for col in df.columns:
            if col[-1] in ("D",):
                df = df.with_columns(pl.col(col) - pl.col("date_decision"))
                df = df.with_columns(pl.col(col).dt.total_days())
                df = df.with_columns(pl.col(col).cast(pl.Float32))
                
        df = df.drop("date_decision", "MONTH")

        return df
    
    @staticmethod
    def filter_cols(df):
        for col in df.columns:
            if col not in ["target", "case_id", "WEEK_NUM"]:
                isnull = df[col].is_null().mean()

                if isnull > 0.95:
                    df = df.drop(col)

        for col in df.columns:
            if (col not in ["target", "case_id", "WEEK_NUM"]) & (df[col].dtype == pl.String):
                freq = df[col].n_unique()

                if (freq == 1) | (freq > 200):
                    df = df.drop(col)

        return df

### Automatic Aggregation

In [None]:
class Aggregator:
    @staticmethod
    def num_expr(df):
        cols = [col for col in df.columns if col[-1] in ("P", "A")]

        expr_max = [pl.max(col).alias(f"max_{col}") for col in cols]

        return expr_max

    @staticmethod
    def date_expr(df):
        cols = [col for col in df.columns if col[-1] in ("D",)]

        expr_max = [pl.max(col).alias(f"max_{col}") for col in cols]

        return expr_max

    @staticmethod
    def str_expr(df):
        cols = [col for col in df.columns if col[-1] in ("M",)]
        
        expr_max = [pl.max(col).alias(f"max_{col}") for col in cols]

        return expr_max

    @staticmethod
    def other_expr(df):
        cols = [col for col in df.columns if col[-1] in ("T", "L")]
        
        expr_max = [pl.max(col).alias(f"max_{col}") for col in cols]

        return expr_max
    
    @staticmethod
    def count_expr(df):
        cols = [col for col in df.columns if "num_group" in col]

        expr_max = [pl.max(col).alias(f"max_{col}") for col in cols]

        return expr_max

    @staticmethod
    def get_exprs(df):
        exprs = Aggregator.num_expr(df) + \
                Aggregator.date_expr(df) + \
                Aggregator.str_expr(df) + \
                Aggregator.other_expr(df) + \
                Aggregator.count_expr(df)

        return exprs

### File I/O

In [None]:
def read_file(path, depth=None):
    df = pl.read_parquet(path)
    df = df.pipe(Pipeline.set_table_dtypes)
    
    if depth in [1, 2]:
        df = df.group_by("case_id").agg(Aggregator.get_exprs(df))
    
    return df

def read_files(regex_path, depth=None):
    chunks = []
    for path in glob(str(regex_path)):
        df = pl.read_parquet(path)
        df = df.pipe(Pipeline.set_table_dtypes)
        
        if depth in [1, 2]:
            df = df.group_by("case_id").agg(Aggregator.get_exprs(df))
        
        chunks.append(df)
        
    df = pl.concat(chunks, how="vertical_relaxed")
    df = df.unique(subset=["case_id"])
    
    return df

### Feature Engineering

In [None]:
def feature_eng(df_base, depth_0, depth_1, depth_2):
    df_base = (
        df_base
        .with_columns(
            month_decision = pl.col("date_decision").dt.month(),
            weekday_decision = pl.col("date_decision").dt.weekday(),
        )
    )
        
    for i, df in enumerate(depth_0 + depth_1 + depth_2):
        df_base = df_base.join(df, how="left", on="case_id", suffix=f"_{i}")
        
    df_base = df_base.pipe(Pipeline.handle_dates)
    
    return df_base

In [None]:
def to_pandas(df_data, cat_cols=None):
    df_data = df_data.to_pandas()
    
    if cat_cols is None:
        cat_cols = list(df_data.select_dtypes("object").columns)
    
    df_data[cat_cols] = df_data[cat_cols].astype("category")
    
    return df_data, cat_cols

### Configuration

In [None]:
ROOT            = Path("../data")
TRAIN_DIR       = ROOT / "parquet_files" / "train"
TEST_DIR        = ROOT / "parquet_files" / "test"

### Train Files Read & Feature Engineering

In [None]:
data_store = {
    "df_base": read_file(TRAIN_DIR / "train_base.parquet"),
    "depth_0": [
        read_file(TRAIN_DIR / "train_static_cb_0.parquet"),
        read_files(TRAIN_DIR / "train_static_0_*.parquet"),
    ],
    "depth_1": [
        read_files(TRAIN_DIR / "train_applprev_1_*.parquet", 1),
        read_file(TRAIN_DIR / "train_tax_registry_a_1.parquet", 1),
        read_file(TRAIN_DIR / "train_tax_registry_b_1.parquet", 1),
        read_file(TRAIN_DIR / "train_tax_registry_c_1.parquet", 1),
        read_files(TRAIN_DIR / "train_credit_bureau_a_1_*.parquet", 1),
        read_file(TRAIN_DIR / "train_credit_bureau_b_1.parquet", 1),
        read_file(TRAIN_DIR / "train_other_1.parquet", 1),
        read_file(TRAIN_DIR / "train_person_1.parquet", 1),
        read_file(TRAIN_DIR / "train_deposit_1.parquet", 1),
        read_file(TRAIN_DIR / "train_debitcard_1.parquet", 1),
    ],
    "depth_2": [
        read_file(TRAIN_DIR / "train_credit_bureau_b_2.parquet", 2),
        read_files(TRAIN_DIR / "train_credit_bureau_a_2_*.parquet", 2),
    ]
}

In [None]:
df_train = feature_eng(**data_store)

print("train data shape:\t", df_train.shape)


train data shape:	 (1526659, 472)


In [None]:
df_train

case_id,WEEK_NUM,target,month_decision,weekday_decision,assignmentdate_238D,assignmentdate_4527235D,assignmentdate_4955616D,birthdate_574D,contractssum_5085716L,dateofbirth_337D,dateofbirth_342D,days120_123L,days180_256L,days30_165L,days360_512L,days90_310L,description_5085714M,education_1103M,education_88M,firstquarter_103L,for3years_128L,for3years_504L,for3years_584L,formonth_118L,formonth_206L,formonth_535L,forquarter_1017L,forquarter_462L,forquarter_634L,fortoday_1092L,forweek_1077L,forweek_528L,forweek_601L,foryear_618L,foryear_818L,foryear_850L,…,max_role_993L,max_safeguarantyflag_411L,max_sex_738L,max_type_25L,max_num_group1_9,max_amount_416A,max_contractenddate_991D,max_openingdate_313D,max_num_group1_10,max_last180dayaveragebalance_704A,max_last180dayturnover_1134A,max_last30dayturnover_651A,max_openingdate_857D,max_num_group1_11,max_pmts_dpdvalue_108P,max_pmts_pmtsoverdue_635A,max_pmts_date_1107D,max_num_group1_12,max_num_group2,max_pmts_dpd_1073P,max_pmts_dpd_303P,max_pmts_overdue_1140A,max_pmts_overdue_1152A,max_collater_typofvalofguarant_298M,max_collater_typofvalofguarant_407M,max_collaterals_typeofguarante_359M,max_collaterals_typeofguarante_669M,max_subjectroles_name_541M,max_subjectroles_name_838M,max_collater_valueofguarantee_1124L,max_collater_valueofguarantee_876L,max_pmts_month_158T,max_pmts_month_706T,max_pmts_year_1139T,max_pmts_year_507T,max_num_group1_13,max_num_group2_13
i32,i32,i64,i8,i8,f32,f32,f32,f32,f64,f32,f32,f64,f64,f64,f64,f64,str,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,str,bool,str,str,i32,f64,f32,f32,i32,f64,f64,f64,f32,i32,f64,f64,f32,i32,i32,f64,f64,f64,f64,str,str,str,str,str,str,f64,f64,f64,f64,f64,f64,i32,i32
0,0,0,1,4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,true,"""F""","""PRIMARY_MOBILE""",3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,0,0,1,4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,true,"""M""","""PRIMARY_MOBILE""",4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,0,0,1,5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,true,"""F""","""PRIMARY_MOBILE""",4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,0,0,1,4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,true,"""F""","""PRIMARY_MOBILE""",2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,0,1,1,5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,true,"""F""","""PRIMARY_MOBILE""",3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2703450,91,0,10,1,,,-998.0,,52863.59,-22193.0,,0.0,0.0,0.0,0.0,0.0,"""2fc785b2""","""a55475b1""","""a55475b1""",0.0,,,,,,,,,,,,,,,,,…,,true,"""F""","""PRIMARY_MOBILE""",0,,,,,,,,,,,,,,,0.0,44.0,0.0,4316.44,"""a55475b1""","""a55475b1""","""c7a5ad39""","""c7a5ad39""","""ab3c25cf""","""ab3c25cf""",0.0,0.0,12.0,12.0,2021.0,2021.0,10,35
2703451,91,0,10,1,,,-5591.0,,324608.52,-25541.0,,0.0,0.0,0.0,0.0,0.0,"""2fc785b2""","""a55475b1""","""a55475b1""",1.0,,,,,,,,,,,,,,,,,…,,true,"""F""","""PRIMARY_MOBILE""",1,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,"""a55475b1""","""a55475b1""","""c7a5ad39""","""c7a5ad39""","""ab3c25cf""","""ab3c25cf""",0.0,0.0,12.0,12.0,2021.0,2021.0,3,23
2703452,91,0,10,1,,,,,102738.76,-15771.0,,2.0,2.0,0.0,3.0,2.0,"""2fc785b2""","""a55475b1""","""a55475b1""",0.0,,,,,,,,,,,,,,,,,…,,false,"""M""","""PRIMARY_MOBILE""",0,,,,,,,,,,,,,,,16.0,0.0,4884.2983,0.0,"""a55475b1""","""a55475b1""","""c7a5ad39""","""c7a5ad39""","""ab3c25cf""","""ab3c25cf""",0.0,0.0,12.0,12.0,2021.0,2020.0,2,35
2703453,91,0,10,1,,,-4616.0,,212683.29,-25814.0,,2.0,2.0,1.0,4.0,1.0,"""2fc785b2""","""6b2ae0fa""","""a55475b1""",1.0,,,,,,,,,,,,,,,,,…,,false,"""F""","""PRIMARY_MOBILE""",1,44916.645,-861.0,-1956.0,1,,,,-1956.0,1,,,,,,0.0,23.0,0.0,2693.2,"""a55475b1""","""a55475b1""","""c7a5ad39""","""c7a5ad39""","""ab3c25cf""","""ab3c25cf""",0.0,0.0,12.0,12.0,2021.0,2020.0,12,35


### Test Files Read & Feature Engineering

In [None]:
data_store = {
    "df_base": read_file(TEST_DIR / "test_base.parquet"),
    "depth_0": [
        read_file(TEST_DIR / "test_static_cb_0.parquet"),
        read_files(TEST_DIR / "test_static_0_*.parquet"),
    ],
    "depth_1": [
        read_files(TEST_DIR / "test_applprev_1_*.parquet", 1),
        read_file(TEST_DIR / "test_tax_registry_a_1.parquet", 1),
        read_file(TEST_DIR / "test_tax_registry_b_1.parquet", 1),
        read_file(TEST_DIR / "test_tax_registry_c_1.parquet", 1),
        read_files(TEST_DIR / "test_credit_bureau_a_1_*.parquet", 1),
        read_file(TEST_DIR / "test_credit_bureau_b_1.parquet", 1),
        read_file(TEST_DIR / "test_other_1.parquet", 1),
        read_file(TEST_DIR / "test_person_1.parquet", 1),
        read_file(TEST_DIR / "test_deposit_1.parquet", 1),
        read_file(TEST_DIR / "test_debitcard_1.parquet", 1),
    ],
    "depth_2": [
        read_file(TEST_DIR / "test_credit_bureau_b_2.parquet", 2),
        read_files(TEST_DIR / "test_credit_bureau_a_2_*.parquet", 2),
    ]
}

In [None]:
df_test = feature_eng(**data_store)

print("test data shape:\t", df_test.shape)

test data shape:	 (10, 471)


In [None]:
df_test

case_id,WEEK_NUM,month_decision,weekday_decision,assignmentdate_238D,assignmentdate_4527235D,assignmentdate_4955616D,birthdate_574D,contractssum_5085716L,dateofbirth_337D,dateofbirth_342D,days120_123L,days180_256L,days30_165L,days360_512L,days90_310L,description_5085714M,education_1103M,education_88M,firstquarter_103L,for3years_128L,for3years_504L,for3years_584L,formonth_118L,formonth_206L,formonth_535L,forquarter_1017L,forquarter_462L,forquarter_634L,fortoday_1092L,forweek_1077L,forweek_528L,forweek_601L,foryear_618L,foryear_818L,foryear_850L,fourthquarter_440L,…,max_role_993L,max_safeguarantyflag_411L,max_sex_738L,max_type_25L,max_num_group1_9,max_amount_416A,max_contractenddate_991D,max_openingdate_313D,max_num_group1_10,max_last180dayaveragebalance_704A,max_last180dayturnover_1134A,max_last30dayturnover_651A,max_openingdate_857D,max_num_group1_11,max_pmts_dpdvalue_108P,max_pmts_pmtsoverdue_635A,max_pmts_date_1107D,max_num_group1_12,max_num_group2,max_pmts_dpd_1073P,max_pmts_dpd_303P,max_pmts_overdue_1140A,max_pmts_overdue_1152A,max_collater_typofvalofguarant_298M,max_collater_typofvalofguarant_407M,max_collaterals_typeofguarante_359M,max_collaterals_typeofguarante_669M,max_subjectroles_name_541M,max_subjectroles_name_838M,max_collater_valueofguarantee_1124L,max_collater_valueofguarantee_876L,max_pmts_month_158T,max_pmts_month_706T,max_pmts_year_1139T,max_pmts_year_507T,max_num_group1_13,max_num_group2_13
i32,i32,i8,i8,f32,f32,f32,f32,f64,f32,f32,f64,f64,f64,f64,f64,str,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,str,bool,str,str,i32,f64,f32,f32,i32,f64,f64,f64,f32,i32,f64,f64,f32,i32,i32,f64,f64,f64,f64,str,str,str,str,str,str,f64,f64,f64,f64,f64,f64,i32,i32
57543,100,5,5,,,,,151364.0,-14804.0,,2.0,4.0,1.0,8.0,2.0,"""2fc785b2""","""6b2ae0fa""","""a55475b1""",4.0,,,,,,,,,,,,,,,,,9.0,…,,False,"""F""","""PRIMARY_MOBILE""",1.0,,,,,,,,,,,,,,,0.0,,0.0,,"""a55475b1""","""9a0c095e""","""c7a5ad39""","""c7a5ad39""","""ab3c25cf""","""ab3c25cf""",0.0,0.0,2.0,2.0,2021.0,2018.0,9.0,0.0
57549,100,1,1,,,-1352.0,,1563100.0,-22723.0,,6.0,9.0,3.0,12.0,4.0,"""2fc785b2""","""39a0853f""","""a55475b1""",9.0,,,,,,,,,,,,,,,,,5.0,…,,True,"""F""","""PRIMARY_MOBILE""",1.0,,,,,,,,,,,,,,,,,,,"""a55475b1""","""9a0c095e""","""c7a5ad39""","""c7a5ad39""","""ab3c25cf""","""ab3c25cf""",0.0,0.0,2.0,2.0,2022.0,2019.0,9.0,0.0
57551,100,11,5,,,,,2926195.3,-14090.0,,1.0,3.0,1.0,4.0,1.0,"""2fc785b2""","""6b2ae0fa""","""a55475b1""",3.0,,,,,,,,,,,,,,,,,2.0,…,,False,"""F""","""PRIMARY_MOBILE""",0.0,,,,,,,,,,,,,,,,,,,"""a55475b1""","""9a0c095e""","""c7a5ad39""","""c7a5ad39""","""ab3c25cf""","""ab3c25cf""",0.0,75200.0,2.0,2.0,2020.0,2017.0,12.0,0.0
57552,100,11,5,,,-740.0,,747031.73,-23768.0,,2.0,2.0,0.0,5.0,0.0,"""2fc785b2""","""a55475b1""","""a55475b1""",3.0,,,,,,,,,,,,,,,,,2.0,…,,True,"""M""","""PRIMARY_MOBILE""",0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
57569,100,12,1,,,-3720.0,,,-26408.0,,4.0,4.0,1.0,4.0,4.0,"""2fc785b2""","""717ddd49""","""a55475b1""",0.0,,,,,,,,,,,,,,,,,0.0,…,"""FULL""",False,"""F""","""PRIMARY_MOBILE""",1.0,,,,,,,,,,,,,,,,2410.0,,33346.402,"""a55475b1""","""a55475b1""","""c7a5ad39""","""a55475b1""","""ab3c25cf""","""a55475b1""",,0.0,,10.0,,2019.0,1.0,8.0
57630,100,3,2,,,,,499975.0,-19767.0,,1.0,2.0,1.0,5.0,1.0,"""2fc785b2""","""6b2ae0fa""","""a55475b1""",1.0,,,,,,,,,,,,,,,,,3.0,…,,True,"""F""","""PRIMARY_MOBILE""",1.0,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,"""a55475b1""","""a55475b1""","""c7a5ad39""","""c7a5ad39""","""ab3c25cf""","""ab3c25cf""",0.0,0.0,10.0,10.0,2021.0,2015.0,1.0,8.0
57631,100,6,6,,,,,480334.49,-12999.0,,0.0,0.0,0.0,1.0,0.0,"""2fc785b2""","""a55475b1""","""a55475b1""",3.0,,,,,,,,,,,,,,,,,7.0,…,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,"""a55475b1""","""a55475b1""","""c7a5ad39""","""c7a5ad39""","""ab3c25cf""","""ab3c25cf""",0.0,0.0,10.0,10.0,2019.0,2018.0,1.0,8.0
57632,100,2,6,,,-2263.0,,17677.0,-23107.0,,1.0,2.0,0.0,4.0,0.0,"""2fc785b2""","""a55475b1""","""a55475b1""",1.0,,,,,,,,,,,,,,,,,1.0,…,,,,,,,,,,,,,,,,,,,,0.0,,0.0,,"""a55475b1""","""a55475b1""","""a55475b1""","""c7a5ad39""","""a55475b1""","""ab3c25cf""",0.0,,11.0,,2019.0,,0.0,9.0
57633,100,1,2,,,,,6373000.0,-10496.0,,3.0,3.0,2.0,8.0,3.0,"""2fc785b2""","""a55475b1""","""a55475b1""",4.0,,,,,,,,,,,,,,,,,8.0,…,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,"""a55475b1""","""a55475b1""","""c7a5ad39""","""c7a5ad39""","""ab3c25cf""","""ab3c25cf""",7230000.0,0.0,3.0,3.0,2021.0,2021.0,8.0,1.0
57634,100,1,3,,,,,15263.65,-16281.0,,2.0,2.0,1.0,3.0,1.0,"""2fc785b2""","""a55475b1""","""a55475b1""",1.0,,,,,,,,,,,,,,,,,1.0,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


### Feature Elimination

In [None]:
df_train = df_train.pipe(Pipeline.filter_cols)
df_test = df_test.select([col for col in df_train.columns if col != "target"])

print("train data shape:\t", df_train.shape)
print("test data shape:\t", df_test.shape)

train data shape:	 (1526659, 361)
test data shape:	 (10, 360)


In [None]:
df_train

case_id,WEEK_NUM,target,month_decision,weekday_decision,assignmentdate_238D,assignmentdate_4527235D,birthdate_574D,contractssum_5085716L,dateofbirth_337D,days120_123L,days180_256L,days30_165L,days360_512L,days90_310L,description_5085714M,education_1103M,education_88M,firstquarter_103L,fourthquarter_440L,maritalst_385M,maritalst_893M,numberofqueries_373L,pmtaverage_3A,pmtaverage_4527227A,pmtcount_4527229L,pmtcount_693L,pmtscount_423L,pmtssum_45A,requesttype_4525192L,responsedate_1012D,responsedate_4527233D,responsedate_4917613D,secondquarter_766L,thirdquarter_1082L,actualdpdtolerance_344P,amtinstpaidbefduel24m_4187115A,…,max_familystate_447L,max_housetype_905L,max_incometype_1044T,max_personindex_1023L,max_persontype_1072L,max_persontype_792L,max_relationshiptoclient_415T,max_relationshiptoclient_642T,max_remitter_829L,max_role_1084L,max_safeguarantyflag_411L,max_sex_738L,max_type_25L,max_num_group1_9,max_amount_416A,max_openingdate_313D,max_num_group1_10,max_openingdate_857D,max_num_group1_11,max_pmts_dpd_1073P,max_pmts_dpd_303P,max_pmts_overdue_1140A,max_pmts_overdue_1152A,max_collater_typofvalofguarant_298M,max_collater_typofvalofguarant_407M,max_collaterals_typeofguarante_359M,max_collaterals_typeofguarante_669M,max_subjectroles_name_541M,max_subjectroles_name_838M,max_collater_valueofguarantee_1124L,max_collater_valueofguarantee_876L,max_pmts_month_158T,max_pmts_month_706T,max_pmts_year_1139T,max_pmts_year_507T,max_num_group1_13,max_num_group2_13
i32,i32,i64,i8,i8,f32,f32,f32,f64,f32,f64,f64,f64,f64,f64,str,str,str,f64,f64,str,str,f64,f64,f64,f64,f64,f64,f64,str,f32,f32,f32,f64,f64,f64,f64,…,str,str,str,f64,f64,f64,str,str,bool,str,bool,str,str,i32,f64,f32,i32,f32,i32,f64,f64,f64,f64,str,str,str,str,str,str,f64,f64,f64,f64,f64,f64,i32,i32
0,0,0,1,4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,"""MARRIED""",,"""SALARIED_GOVT""",2.0,5.0,5.0,"""SPOUSE""","""SPOUSE""",false,"""PE""",true,"""F""","""PRIMARY_MOBILE""",3,,,,,,,,,,,,,,,,,,,,,,,
1,0,0,1,4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,"""DIVORCED""",,"""SALARIED_GOVT""",2.0,5.0,5.0,"""SIBLING""","""SIBLING""",false,"""PE""",true,"""M""","""PRIMARY_MOBILE""",4,,,,,,,,,,,,,,,,,,,,,,,
2,0,0,1,5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,"""MARRIED""",,"""EMPLOYED""",2.0,5.0,5.0,"""SPOUSE""","""SPOUSE""",false,"""PE""",true,"""F""","""PRIMARY_MOBILE""",4,,,,,,,,,,,,,,,,,,,,,,,
3,0,0,1,4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,"""MARRIED""",,"""EMPLOYED""",1.0,4.0,4.0,"""SPOUSE""","""SPOUSE""",false,"""PE""",true,"""F""","""PRIMARY_MOBILE""",2,,,,,,,,,,,,,,,,,,,,,,,
4,0,1,1,5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,"""MARRIED""",,"""EMPLOYED""",2.0,5.0,5.0,"""SIBLING""","""SIBLING""",false,"""PE""",true,"""F""","""PRIMARY_MOBILE""",3,,,,,,,,,,,,,,,,,,,,,,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2703450,91,0,10,1,,,,52863.59,-22193.0,0.0,0.0,0.0,0.0,0.0,"""2fc785b2""","""a55475b1""","""a55475b1""",0.0,1.0,"""a55475b1""","""a55475b1""",0.0,,,,,,,,,,14.0,1.0,1.0,0.0,176561.36,…,,"""OWNED""","""RETIRED_PENSIONER""",0.0,1.0,1.0,,,,"""CL""",true,"""F""","""PRIMARY_MOBILE""",0,,,,,,0.0,44.0,0.0,4316.44,"""a55475b1""","""a55475b1""","""c7a5ad39""","""c7a5ad39""","""ab3c25cf""","""ab3c25cf""",0.0,0.0,12.0,12.0,2021.0,2021.0,10,35
2703451,91,0,10,1,,,,324608.52,-25541.0,0.0,0.0,0.0,0.0,0.0,"""2fc785b2""","""a55475b1""","""a55475b1""",1.0,0.0,"""a55475b1""","""a55475b1""",0.0,,,,,,,,,,14.0,1.0,2.0,0.0,301276.47,…,,,"""RETIRED_PENSIONER""",0.0,1.0,1.0,,,,"""CL""",true,"""F""","""PRIMARY_MOBILE""",1,,,,,,0.0,0.0,0.0,0.0,"""a55475b1""","""a55475b1""","""c7a5ad39""","""c7a5ad39""","""ab3c25cf""","""ab3c25cf""",0.0,0.0,12.0,12.0,2021.0,2021.0,3,23
2703452,91,0,10,1,,,,102738.76,-15771.0,2.0,2.0,0.0,3.0,2.0,"""2fc785b2""","""a55475b1""","""a55475b1""",0.0,1.0,"""a55475b1""","""a55475b1""",3.0,,,,,,,,,,14.0,0.0,4.0,0.0,14232.4,…,,,"""PRIVATE_SECTOR_EMPLOYEE""",0.0,1.0,1.0,,,,"""CL""",false,"""M""","""PRIMARY_MOBILE""",0,,,,,,16.0,0.0,4884.2983,0.0,"""a55475b1""","""a55475b1""","""c7a5ad39""","""c7a5ad39""","""ab3c25cf""","""ab3c25cf""",0.0,0.0,12.0,12.0,2021.0,2020.0,2,35
2703453,91,0,10,1,,,,212683.29,-25814.0,2.0,2.0,1.0,4.0,1.0,"""2fc785b2""","""6b2ae0fa""","""a55475b1""",1.0,3.0,"""3439d993""","""a55475b1""",4.0,,,,,,,,,,12.0,2.0,1.0,0.0,197371.58,…,,,"""RETIRED_PENSIONER""",0.0,1.0,1.0,,,,"""CL""",false,"""F""","""PRIMARY_MOBILE""",1,44916.645,-1956.0,1,-1956.0,1,0.0,23.0,0.0,2693.2,"""a55475b1""","""a55475b1""","""c7a5ad39""","""c7a5ad39""","""ab3c25cf""","""ab3c25cf""",0.0,0.0,12.0,12.0,2021.0,2020.0,12,35


### Pandas Conversion