In [4]:
import sys
sys.path.append("../src")

from datahabit.data_cleaner import DataCleaner, DataCleanerError

cleaner = DataCleaner()

timestamps = [
    "2025-11-04 10:00:00",
    "INVALID FORMAT",
    "",
    None
]

for ts in timestamps:
    print(ts, "=>", cleaner.validate_timestamp(ts))


2025-11-04 10:00:00 => True
INVALID FORMAT => False
 => False
None => False


In [5]:
fixed = cleaner.fix_missing([
    "2025-11-04 10:00:00",
    "",
    None,
    "2025-12-01 08:30:00"
])

print("Fixed entries:", fixed)


Fixed entries: ['2025-11-04 10:00:00', 'MISSING', 'MISSING', '2025-12-01 08:30:00']


In [6]:
try:
    converted = cleaner.convert_all([
        "2025-11-04 10:00:00",
        "2025-12-01 08:30:00",
        ""   # this triggers NullEntryError
    ])
    print(converted)

except DataCleanerError as e:
    print("Error caught:", e)


Error caught: [DataCleanerError] Encountered a null or empty timestamp entry.


In [7]:
entries = [
    "2025-11-04 10:00:00",
    "",
    "2025-11-05 09:00:00",
    None,
    "2025-13-99 88:88:88"   # invalid date
]

# 1. Step: fix missing
cleaned = cleaner.fix_missing(entries, replacement="MISSING")
print("After fixing:", cleaned)

# 2. Step: validate each
validated = [cleaner.validate_timestamp(ts) for ts in cleaned]
print("Validity:", validated)

# 3. Step: convert valid timestamps only
valid_entries = [ts for ts in cleaned if cleaner.validate_timestamp(ts)]
converted = cleaner.convert_all(valid_entries)
print("Converted:", converted)


After fixing: ['2025-11-04 10:00:00', 'MISSING', '2025-11-05 09:00:00', 'MISSING', '2025-13-99 88:88:88']
Validity: [True, False, True, False, False]
Converted: [datetime.datetime(2025, 11, 4, 10, 0), datetime.datetime(2025, 11, 5, 9, 0)]
