In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

# Sample dataset
data = {
    'Age': [25, 45, 30, 35, 50, 40, 60],
    'Gender': [1, 0, 1, 0, 1, 0, 1],
    'BloodPressure': [120, 130, 115, 140, 135, 150, 125],
    'Cholesterol': [200, 210, 190, 220, 215, 225, 205],
    'Glucose': [85, 90, 80, 95, 88, 92, 89],
    'BMI': [22.5, np.nan, 24.7, 26.8, np.nan, 30.1, 28.0]
}

df = pd.DataFrame(data)

# Separate rows with and without missing 'BMI'
train_data = df.dropna(subset=['BMI'])
test_data = df[df['BMI'].isnull()]

# Features and target variable for training
X_train = train_data.drop(columns=['BMI'])
y_train = train_data['BMI']

# Features for testing (missing 'BMI' values)
X_test = test_data.drop(columns=['BMI'])

# Initialize and train the regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict the missing 'BMI' values
predicted_bmi = model.predict(X_test)

# Impute the missing 'BMI' values in the original DataFrame
df.loc[df['BMI'].isnull(), 'BMI'] = predicted_bmi

# Print the original DataFrame and the DataFrame after imputation
print("Original DataFrame with Missing Values:")
print(pd.DataFrame(data))

print("\nDataFrame after Regression Imputation:")
print(df)


Original DataFrame with Missing Values:
   Age  Gender  BloodPressure  Cholesterol  Glucose   BMI
0   25       1            120          200       85  22.5
1   45       0            130          210       90   NaN
2   30       1            115          190       80  24.7
3   35       0            140          220       95  26.8
4   50       1            135          215       88   NaN
5   40       0            150          225       92  30.1
6   60       1            125          205       89  28.0

DataFrame after Regression Imputation:
   Age  Gender  BloodPressure  Cholesterol  Glucose        BMI
0   25       1            120          200       85  22.500000
1   45       0            130          210       90  26.678819
2   30       1            115          190       80  24.700000
3   35       0            140          220       95  26.800000
4   50       1            135          215       88  27.011193
5   40       0            150          225       92  30.100000
6   60       1 