# Data Download, Cleaning, and Mapping (German Credit)

This notebook demonstrates how to:
- Download the Statlog German Credit dataset (UCI id=144)
- Preview the raw data
- Map symbolic category codes (Axx) to human‑readable values
- Preview the cleaned DataFrame


## 1) Install dependencies

In [None]:
%pip install -q ucimlrepo pandas

## 2) Download dataset and preview

In [None]:
from ucimlrepo import fetch_ucirepo
import pandas as pd

# fetch dataset (UCI id=144: Statlog (German Credit Data))
statlog_german_credit_data = fetch_ucirepo(id=144)

# data (as pandas dataframes)
X = statlog_german_credit_data.data.features
y = statlog_german_credit_data.data.targets

# Combine features and target for convenience
if y is not None:
    if isinstance(y, pd.DataFrame):
        y_df = y
    else:
        y_df = pd.DataFrame(y)
    if y_df.shape[1] == 1:
        # Ensure a single target column name
        if not isinstance(y_df.columns[0], str) or not y_df.columns[0]:
            y_df.columns = ['target']
    else:
        y_df.columns = [c if isinstance(c, str) and c else f'target_{i}' for i, c in enumerate(y_df.columns)]
    df = pd.concat([X.reset_index(drop=True), y_df.reset_index(drop=True)], axis=1)
else:
    df = X.copy()

print('Metadata:\n', statlog_german_credit_data.metadata)
print('
Variables (first 10):\n', statlog_german_credit_data.variables.head(10))
print('
Shape:', df.shape)
df.head()


## 3) Mapping specification (Axx → human‑readable)Attribute 1: Status of existing checking account  A11: ... < 0 DM  A12: 0 <= ... < 200 DM  A13: ... >= 200 DM / salary assignments for at least 1 year  A14: no checking account  Attribute 2 (numerical): Duration in month  Attribute 3: Credit history  A30: no credits taken/ all credits paid back duly  A31: all credits at this bank paid back duly  A32: existing credits paid back duly till now  A33: delay in paying off in the past  A34: critical account/ other credits existing (not at this bank)  Attribute 4: Purpose  A40: car (new)  A41: car (used)  A42: furniture/equipment  A43: radio/television  A44: domestic appliances  A45: repairs  A46: education  A48: retraining  A49: business  A410: others  Attribute 5 (numerical): Credit amount  Attribute 6: Savings account/bonds  A61: ... < 100 DM  A62: 100 <= ... < 500 DM  A63: 500 <= ... < 1000 DM  A64: ... >= 1000 DM  A65: unknown/ no savings account  Attribute 7: Present employment since  A71: unemployed  A72: ... < 1 year  A73: 1 <= ... < 4 years  A74: 4 <= ... < 7 years  A75: ... >= 7 years  Attribute 8 (numerical): Installment rate (% of disposable income)  Attribute 9: Personal status and sex  A91: male: divorced/separated  A92: female: divorced/separated/married  A93: male: single  A94: male: married/widowed  A95: female: single  Attribute 10: Other debtors / guarantors  A101: none  A102: co-applicant  A103: guarantor  Attribute 11 (numerical): Present residence since  Attribute 12: Property  A121: real estate  A122: building society savings agreement/ life insurance  A123: car or other (not in attribute 6)  A124: unknown / no property  Attribute 13 (numerical): Age in years  Attribute 14: Other installment plans  A141: bank  A142: stores  A143: none  Attribute 15: Housing  A151: rent  A152: own  A153: for free  Attribute 16 (numerical): Number of existing credits at this bank  Attribute 17: Job  A171: unemployed/ unskilled - non-resident  A172: unskilled - resident  A173: skilled employee / official  A174: management/self-employed/highly qualified employee/officer  Attribute 18 (numerical): Number of people being liable to provide maintenance for  Attribute 19: Telephone  A191: none  A192: yes, registered under the customer's name  Attribute 20: foreign worker  A201: yes  A202: no

## 4) Apply mapping and standardize column names

In [None]:
import pandas as pd# Define mapping dictionaries (Axx -> readable)map_1_checking = {    'A11': 'negative_balance',    'A12': '0_to_200_dm',    'A13': '200_or_more_dm',    'A14': 'no_checking_account',}map_3_history = {    'A30': 'no_credits',    'A31': 'all_paid',    'A32': 'existing_paid',    'A33': 'delay',    'A34': 'critical',}map_4_purpose = {    'A40': 'car_new', 'A41': 'car_used', 'A42': 'furniture', 'A43': 'radio_tv', 'A44': 'appliances',    'A45': 'repairs', 'A46': 'education', 'A48': 'retraining', 'A49': 'business', 'A410': 'others',}map_6_savings = {    'A61': 'lt_100_dm', 'A62': '100_to_500_dm', 'A63': '500_to_1000_dm', 'A64': 'ge_1000_dm', 'A65': 'unknown_no_savings',}map_7_employment = {    'A71': 'unemployed', 'A72': 'lt_1_year', 'A73': '1_to_4_years', 'A74': '4_to_7_years', 'A75': 'ge_7_years',}map_9_personal_status_sex = {    'A91': 'male_divorced_separated',    'A92': 'female_divorced_separated_married',    'A93': 'male_single',    'A94': 'male_married_widowed',    'A95': 'female_single',}map_10_other_debtors = {    'A101': 'none', 'A102': 'co_applicant', 'A103': 'guarantor',}map_12_property = {    'A121': 'real_estate', 'A122': 'savings_agreement_or_life_insurance', 'A123': 'car_or_other', 'A124': 'unknown_no_property',}map_14_other_plans = {    'A141': 'bank', 'A142': 'stores', 'A143': 'none',}map_15_housing = {    'A151': 'rent', 'A152': 'own', 'A153': 'for_free',}map_17_job = {    'A171': 'unemployed_or_unskilled_non_resident',    'A172': 'unskilled_resident',    'A173': 'skilled_employee_official',    'A174': 'management_self_employed_highly_qualified_officer',}map_19_telephone = {    'A191': 'none', 'A192': 'yes_registered',}map_20_foreign_worker = {    'A201': 'yes', 'A202': 'no',}# Column variants: handle either canonical names or AttributeNcol_variants = {    'checking_status': ['checking_status', 'Status of existing checking account', 'Attribute1'],    'duration': ['duration', 'Duration in month', 'Attribute2'],    'credit_history': ['credit_history', 'Credit history', 'Attribute3'],    'purpose': ['purpose', 'Purpose', 'Attribute4'],    'credit_amount': ['credit_amount', 'Credit amount', 'Attribute5'],    'savings_status': ['savings_status', 'Savings account/bonds', 'Attribute6'],    'employment': ['employment', 'Present employment since', 'Attribute7'],    'installment_commitment': ['installment_commitment', 'Installment rate in percentage of disposable income', 'Attribute8'],    'personal_status_sex': ['personal_status_sex', 'Personal status and sex', 'Attribute9'],    'other_debtors': ['other_debtors', 'Other debtors / guarantors', 'Attribute10'],    'residence_since': ['residence_since', 'Present residence since', 'Attribute11'],    'property_magnitude': ['property_magnitude', 'Property', 'Attribute12'],    'age': ['age', 'Age in years', 'Attribute13'],    'other_payment_plans': ['other_payment_plans', 'Other installment plans', 'Attribute14'],    'housing': ['housing', 'Housing', 'Attribute15'],    'existing_credits': ['existing_credits', 'Number of existing credits at this bank', 'Attribute16'],    'job': ['job', 'Job', 'Attribute17'],    'num_dependents': ['num_dependents', 'Number of people being liable to provide maintenance for', 'Attribute18'],    'own_telephone': ['own_telephone', 'Telephone', 'Attribute19'],    'foreign_worker': ['foreign_worker', 'foreign worker', 'Attribute20'],}mapping_per_col = {    'checking_status': map_1_checking,    'credit_history': map_3_history,    'purpose': map_4_purpose,    'savings_status': map_6_savings,    'employment': map_7_employment,    'personal_status_sex': map_9_personal_status_sex,    'other_debtors': map_10_other_debtors,    'property_magnitude': map_12_property,    'other_payment_plans': map_14_other_plans,    'housing': map_15_housing,    'job': map_17_job,    'own_telephone': map_19_telephone,    'foreign_worker': map_20_foreign_worker,}rename_map = {}for target_name, variants in col_variants.items():    current = next((c for c in variants if c in df.columns), None)    if current is None:        continue    if target_name in mapping_per_col:        df[current] = df[current].astype(str).map(mapping_per_col[target_name]).fillna(df[current].astype(str))    if current != target_name:        df.rename(columns={current: target_name}, inplace=True)        rename_map[current] = target_nameprint('Columns renamed:', rename_map)print('\nDtypes after mapping (first 10):\n', df.dtypes.head(10))df.head()

## 5) Post-mapping preview

In [None]:
print('Shape:', df.shape)
df.head()


## 6) (Optional) Save cleaned dataset

In [None]:
from pathlib import Path
out_path = Path('data')
out_path.mkdir(exist_ok=True)
clean_path = out_path / 'german_credit_clean.csv'
df.to_csv(clean_path, index=False)
print(f'Saved cleaned dataset to: {clean_path.resolve()}')
