In [1]:
import pandas as pd
from sklearn.impute import SimpleImputer

In [2]:
# Load dataset
df = pd.read_csv("Fe2O3.csv")

In [3]:
# Clean column names
df.columns = df.columns.str.replace('[^A-Za-z0-9_]+', '_', regex=True)

In [4]:
# Strip whitespaces
for col in df.select_dtypes(include='object').columns:
    df[col] = df[col].str.strip()

In [5]:
# Define column categories
num_cols = ['extract_volume_mL_', 'conc_M_', 'precursor_volume_mL_', 'ph', 'time_hr_']
cat_cols = ['plant_extract', 'precursor', 'methods', 'additives']

In [6]:
# Fill missing numerical with mean
imputer_num = SimpleImputer(strategy='mean')
df[num_cols] = imputer_num.fit_transform(df[num_cols])

In [7]:
# Fill missing categorical with mode
imputer_cat = SimpleImputer(strategy='most_frequent')
df[cat_cols] = imputer_cat.fit_transform(df[cat_cols])

In [8]:
# Preview cleaned data
df.head()

Unnamed: 0,plant_extract,extract_volume_mL_,precursor,conc_M_,precursor_volume_mL_,ph,time_hr_,methods,additives,particle_size_nm_
0,Syzygium aromaticum,15.0,FeCl3,0.001,5.0,6.0,1.0,Co-Precipitation,0.10 M NaOH,50
1,Moringa oleifera leaves,20.0,Fe3Cl4,0.6,80.0,8.8,3.416667,waterbath,60c sittirng,16
2,Madhuca indica leaves,90.0,FeSO4,0.025,10.0,8.8,24.0,Co-Precipitation,60c sittirng,56
3,Bauhinia tomentosa leaves,50.0,FeCl3,0.01,50.0,8.8,3.416667,Stirring,wash with water and acetone,70
4,Jackfruit peel,30.0,FeCl2,0.1,20.0,6.0,3.416667,Co-Precipitation,60c sittirng,32
