In [None]:
import numpy as np

# Convert string "nan" to actual NaN
df_clean["form"] = df_clean["form"].replace("nan", np.nan)

# Drop rows with missing form
df_clean = df_clean.dropna(subset=["form"]).reset_index(drop=True)

# Fix typos and short forms
form_fix = {
    "caps": "capsule",
    "cre": "cream",
    "dro": "drops",
    "power": "powder",
    "tabs.": "tablet",
    "tablets": "tablet",
}
df_clean["form"] = df_clean["form"].replace(form_fix)

# Consolidation maps
drop_map = {
    "eye drops": "drops",
    "ear drops": "drops",
    "oral drops": "drops",
    "nasal drops": "drops",
    "mouth drops": "drops",
}

inject_map = {
    "ampoule": "injection",
    "vial": "injection",
    "syringe": "injection",
    "pen": "injection",
}

topical_map = {
    "hair oil": "topical",
    "conditioner": "topical",
    "soap": "topical",
    "shampoo": "topical",
    "facial wash": "topical",
    "paint": "topical",
    "serum": "topical",
    "oil": "topical",
    "foam": "topical",
    "gel": "topical",
    "lotion": "topical",
    "cream": "topical",
    "ointment": "topical",
    "eye ointment": "topical",
}

solid_map = {
    "tablet": "oral_solid",
    "capsule": "oral_solid",
    "lozenges": "oral_solid",
    "film": "oral_solid",
    "piece": "oral_solid",
    "effervescent": "oral_solid",
}

liquid_map = {
    "syrup": "oral_liquid",
    "suspension": "oral_liquid",
    "solution": "oral_liquid",
    "mouth wash": "oral_liquid",
    "vaginal douche": "oral_liquid",
    "bottle": "oral_liquid",
}

# Combine all consolidation maps
final_maps = {}
final_maps.update(drop_map)
final_maps.update(inject_map)
final_maps.update(topical_map)
final_maps.update(solid_map)
final_maps.update(liquid_map)

# Apply final mapping
df_clean["form"] = df_clean["form"].replace(final_maps)

# Create clean column
df_clean["form_clean"] = df_clean["form"]

# Verify results
print(df_clean["form_clean"].value_counts())
print(f"\nNull values: {df_clean['form_clean'].isnull().sum()}")
print(f"Unique values: {df_clean['form_clean'].nunique()}")