In [4]:
import sys
from pathlib import Path

# Add project root to sys.path
project_root = Path().resolve().parent  # if running from folder with parent directory as project root
sys.path.append(str(project_root))

In [6]:
import pandas as pd

# ======================
# 1️⃣ Create dummy dataset
# ======================
data_train = {
    'Gender': ['Male', 'Male', 'Female', 'Female'],
    'Smoking_Status': ['Yes', 'No', 'Yes', 'No'],
    'Comorbidity': ['Hypertension', 'Hypertension', 'Diabetes', 'Hypertension']  
}

df_train = pd.DataFrame(data_train)
df_test = pd.DataFrame({
    'Gender': ['Male', 'Male', 'Female', 'Female'],
    'Smoking_Status': ['Yes', 'No', 'Yes', 'No'],
    'Comorbidity': ['unknown', 'unknown', 'unknown', 'unknown']
})

# ======================
# 2️⃣ Initialize KModesImputer
# ======================
from src.data_preprocessor import KModesImputer

imputer = KModesImputer(
    columns_for_clustering=['Gender', 'Smoking_Status'],
    column_to_impute='Comorbidity',
    n_clusters=4,
    placeholder='unknown',
    random_state=42
)

# ======================
# 3️⃣ Fit and transform
# ======================
imputer.fit(df_train)
imputed_train = imputer.transform(df_train)
imputed_test = imputer.transform(df_test)

# ======================
# 4️⃣ Show results
# ======================
# df_train['Comorbidity_Imputed'] = imputed_train
# df_test['Comorbidity_Imputed'] = imputed_test

print("\nImputed Train Data:")
print(imputed_train)
print("\nImputed Test Data:")
print(imputed_test)


Imputed Train Data:
   Gender Smoking_Status   Comorbidity
0    Male            Yes  Hypertension
1    Male             No  Hypertension
2  Female            Yes      Diabetes
3  Female             No  Hypertension

Imputed Test Data:
   Gender Smoking_Status   Comorbidity
0    Male            Yes  Hypertension
1    Male             No  Hypertension
2  Female            Yes      Diabetes
3  Female             No  Hypertension
