In [1]:
import pandas as pd
import numpy as np
from ucimlrepo import fetch_ucirepo

In [2]:
# Step 1: Fetch the dataset
dataset = fetch_ucirepo(id=144)
raw_output = "../data/raw/de_credit_data_raw.csv"
cleaned_output = "../data/clean/de_credit_data_cleaned.csv"

In [None]:
# Step 2: Combine features and target into one DataFrame
df = pd.concat([dataset.data.features, dataset.data.targets], axis=1)

# Step 3: Extract the metadata table
meta = dataset.variables  # contains name, description, type, etc.

# Step 4: Map coded column names to full descriptions
column_description_map = dict(zip(meta['name'], meta['description']))
df_renamed_col = df.rename(columns=column_description_map)
df_renamed_col.to_csv(raw_output,sep= ";", index=False)
df_renamed_col.info()

In [None]:
# df.columns
# df_renamed_col.columns

In [None]:
df_renamed_col.head()

In [None]:
df_renamed_col.isna().sum()

In [None]:
col_dict= {
        "status_of_existing_checking_account": "status_checking_account",
        "duration": "duration_in_month",
        "present_employment_since": "present_employment",
        "installment_rate_in_percentage_of_disposable_income": "installment_rate",
        "personal_status_and_sex": "gender_status",
        "present_residence_since": "present_residence",
        "other_installment_plans": "other_installment",
        "number_of_existing_credits_at_this_bank": "existing_credits",
        "job": "job_skill",
        "number_of_people_being_liable_to_provide_maintenance_for": "number_of_dependents",        
        "1_=_good,_2_=_bad": "score"
}
df1 = df_renamed_col.copy()
df1.columns = df1.columns.str.strip().str.replace(' / ', '/').str.replace(' ', '_').str.lower()
df1 = df1.rename(columns =col_dict)
df1.columns

In [None]:
#  Attribute 1:  (qualitative)
df1["status_checking_account"] = df1["status_checking_account"].map({
                                                                    "A11" : "< 0",
                                                                    "A12" : "< 200",
                                                                    "A13" :  ">= 200/salary assignments for at least 1 year" ,
                                                                    "A14" : "no checking account"
                                                                    })

# Attribute 3
df1["credit_history"] = df1["credit_history"].map({
                                                   "A30" : "no credits taken/all credits paid back duly",
                                                   "A31" : "all credits at this bank paid back duly",
                                                   "A32" : "existing credits paid back duly till now",
                                                   "A33" : "delay in paying off in the past",
                                                   "A34" : "critical account/ other credits existing (not at this bank)"
                                                  })

# Attribute 4
df1["purpose"] = df1["purpose"].map({
                                    "A40" : "car (new)",
                                    "A41" : "car (used)",
                                    "A42" : "furniture/equipment",
                                    "A43" : "radio/television",
                                    "A44" : "domestic appliances",
                                    "A45" : "repairs",
                                    "A46" : "education",
                                    "A47" : "(vacation - does not exist?)",
                                    "A48" : "retraining",
                                    "A49" : "business",
                                    "A410" : "others"
                                    })

# Attibute 6:  (qualitative)
df1["savings_account/bonds"] = df1["savings_account/bonds"].map({
                                                                "A61" : "< 100",
                                                                "A62" : "100-499 ",
                                                                "A63" : "500-999",
                                                                "A64" : ">= 1000",
                                                                "A65" : "unknown/ no savings account"
                                                                })

# Attribute 7:  (qualitative)
df1["present_employment"] = df1["present_employment"].map({
                                                            "A71" : "unemployed",
                                                            "A72" : "< 1 year",
                                                            "A73" : "1 - less than 4 years"  ,
                                                            "A74" : "4 - less than 7 years",
                                                            "A75" : ">= 7 years"
                                                            })

# Attribute 9
df1["gender_status"] = df1["gender_status"].map({
                                                "A91" : "male   : divorced/separated",
                                                "A92" : "female : divorced/separated/married",
                                                "A93" : "male   : single",
                                                "A94" : "male   : married/widowed",
                                                "A95" : "female : single  "
                                                })

# Attribute 10: (qualitative)
df1["other_debtors/guarantors"] = df1["other_debtors/guarantors"].map({
                                                                        "A101" : "none",
                                                                        "A102" : "co-applicant",
                                                                        "A103" : "guarantor"
                                                                      })

# Attribute 12: (qualitative)
df1["property"] = df1["property"].map({
                                        "A121" : "real estate",
                                        "A122" : "if not A121 : building society savings agreement/life insurance",
                                        "A123" : "if not A121/A122 : car or other, not in attribute 6",
                                        "A124" : "unknown/no propert"
})

# Attribute 14: (qualitative)
df1["other_installment"] = df1["other_installment"].map({
                                                        "A141" : "bank",
                                                        "A142" : "stores",
                                                        "A143" : "none"
                                                        })

# Attribute 15: (qualitative)
df1["housing"] = df1["housing"].map({
                                    "A151" : "rent",
                                    "A152" : "own",
                                    "A153" : "for free"
                                    })

# Attribute 17: (qualitative)
df1["job_skill"] = df1["job_skill"].map({
                                        "A171" : "unemployed/unskilled - non-resident",
                                        "A172" : "unskilled - resident",
                                        "A173" : "skilled employee /official",
                                        "A174" : "management/self-employed/highly qualified employee/officer"
                                        })

# Attribute 19: (qualitative)
df1["telephone"] = df1["telephone"].map({
                                        "A191" : "none",
                                        "A192" : "yes"
                                        })

# Attribute 20: (qualitative)
df1["foreign_worker"] = df1["foreign_worker"].map({
                                                    "A201" : "yes",
                                                    "A202" : "no"
                                                  })

df1.to_csv(cleaned_output,sep= ";", index=False)
df1.nunique

In [None]:
df1.info()

In [None]:
df2 = df1.copy()
col_to_validate = df2.columns.drop()