In [1]:

import pandas as pd
from io import StringIO
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

# Sample CSV data with missing values in 'age'
csv_data = """id,name,age,height,weight
1,Alice,25,165,68
2,Bob,,175,75
3,Charlie,30,180,85
4,David,22,170,72
5,Eva,,160,55
"""

# Load data
df = pd.read_csv(StringIO(csv_data))

# Prepare features and target for imputation
# Use 'height' and 'weight' to predict 'age'
features = df[['height', 'weight']]
target = df['age']

# Split data into train (non-missing age) and predict (missing age)
train_X = features[target.notnull()]
train_y = target[target.notnull()]
predict_X = features[target.isnull()]

# Train RandomForestRegressor on available data
model = RandomForestRegressor(random_state=42)
model.fit(train_X, train_y)

# Predict missing ages
predicted_ages = model.predict(predict_X)

# Fill missing values in original dataframe
df.loc[target.isnull(), 'age'] = predicted_ages

print(df)

   id     name    age  height  weight
0   1    Alice  25.00     165      68
1   2      Bob  23.92     175      75
2   3  Charlie  30.00     180      85
3   4    David  22.00     170      72
4   5      Eva  24.64     160      55
