<h3>Multivariate Imputation by Chained Equations for Missing Value</h3>

using scikit learn

In [2]:
import pandas as pd
import numpy as np
from  sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

In [3]:
df = pd.DataFrame({
    'age': [25,27,29,31,33,np.nan],
    'experience': [np.nan, 3,5,7,9,11],
    'salary': [50, np.nan, 110,140,170,200],
    'purchased' : [0,1,1,0,1,0]
})
df.head()

Unnamed: 0,age,experience,salary,purchased
0,25.0,,50.0,0
1,27.0,3.0,,1
2,29.0,5.0,110.0,1
3,31.0,7.0,140.0,0
4,33.0,9.0,170.0,1


In [6]:
X = df.drop(columns=['purchased'])
y = df['purchased']

In [7]:
X.head()

Unnamed: 0,age,experience,salary
0,25.0,,50.0
1,27.0,3.0,
2,29.0,5.0,110.0
3,31.0,7.0,140.0
4,33.0,9.0,170.0


In [8]:
X.corr()

Unnamed: 0,age,experience,salary
age,1.0,1.0,1.0
experience,1.0,1.0,1.0
salary,1.0,1.0,1.0


In [11]:
lr = LinearRegression()
imp = IterativeImputer(estimator=lr,max_iter=30,imputation_order="roman",tol=0,verbose=2)

In [12]:
imp.fit_transform(X)

[IterativeImputer] Completing matrix with shape (6, 3)
[IterativeImputer] Ending imputation round 1/30, elapsed time 0.01
[IterativeImputer] Change: 61.22518987714511, scaled tolerance: 0.0 
[IterativeImputer] Ending imputation round 2/30, elapsed time 0.02
[IterativeImputer] Change: 7.963767891095671, scaled tolerance: 0.0 
[IterativeImputer] Ending imputation round 3/30, elapsed time 0.02
[IterativeImputer] Change: 0.7509179143104348, scaled tolerance: 0.0 
[IterativeImputer] Ending imputation round 4/30, elapsed time 0.02
[IterativeImputer] Change: 0.013116467770018403, scaled tolerance: 0.0 
[IterativeImputer] Ending imputation round 5/30, elapsed time 0.03
[IterativeImputer] Change: 0.0008142526488512658, scaled tolerance: 0.0 
[IterativeImputer] Ending imputation round 6/30, elapsed time 0.03
[IterativeImputer] Change: 3.972059862178412e-05, scaled tolerance: 0.0 
[IterativeImputer] Ending imputation round 7/30, elapsed time 0.03
[IterativeImputer] Change: 2.1412899258166362e-06,



array([[ 25.,   1.,  50.],
       [ 27.,   3.,  80.],
       [ 29.,   5., 110.],
       [ 31.,   7., 140.],
       [ 33.,   9., 170.],
       [ 35.,  11., 200.]])

In [17]:
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=2,test_size = 0.2)

In [18]:
X_test

Unnamed: 0,age,experience,salary
4,33.0,9.0,170.0
1,27.0,3.0,


In [19]:
imp.imputation_sequence_

[_ImputerTriplet(feat_idx=0, neighbor_feat_idx=array([1, 2]), estimator=LinearRegression()),
 _ImputerTriplet(feat_idx=1, neighbor_feat_idx=array([0, 2]), estimator=LinearRegression()),
 _ImputerTriplet(feat_idx=2, neighbor_feat_idx=array([0, 1]), estimator=LinearRegression()),
 _ImputerTriplet(feat_idx=0, neighbor_feat_idx=array([1, 2]), estimator=LinearRegression()),
 _ImputerTriplet(feat_idx=1, neighbor_feat_idx=array([0, 2]), estimator=LinearRegression()),
 _ImputerTriplet(feat_idx=2, neighbor_feat_idx=array([0, 1]), estimator=LinearRegression()),
 _ImputerTriplet(feat_idx=0, neighbor_feat_idx=array([1, 2]), estimator=LinearRegression()),
 _ImputerTriplet(feat_idx=1, neighbor_feat_idx=array([0, 2]), estimator=LinearRegression()),
 _ImputerTriplet(feat_idx=2, neighbor_feat_idx=array([0, 1]), estimator=LinearRegression()),
 _ImputerTriplet(feat_idx=0, neighbor_feat_idx=array([1, 2]), estimator=LinearRegression()),
 _ImputerTriplet(feat_idx=1, neighbor_feat_idx=array([0, 2]), estimato

In [20]:
print(imp.imputation_sequence_[0][2].coef_)
print(imp.imputation_sequence_[0][2].intercept_)

[0.58860759 0.05590717]
18.597046413502106


In [21]:
imp.transform(X_test)

[IterativeImputer] Completing matrix with shape (2, 3)
[IterativeImputer] Ending imputation round 1/30, elapsed time 0.00
[IterativeImputer] Ending imputation round 2/30, elapsed time 0.00
[IterativeImputer] Ending imputation round 3/30, elapsed time 0.00
[IterativeImputer] Ending imputation round 4/30, elapsed time 0.00
[IterativeImputer] Ending imputation round 5/30, elapsed time 0.00
[IterativeImputer] Ending imputation round 6/30, elapsed time 0.00
[IterativeImputer] Ending imputation round 7/30, elapsed time 0.00
[IterativeImputer] Ending imputation round 8/30, elapsed time 0.00
[IterativeImputer] Ending imputation round 9/30, elapsed time 0.00
[IterativeImputer] Ending imputation round 10/30, elapsed time 0.00
[IterativeImputer] Ending imputation round 11/30, elapsed time 0.00
[IterativeImputer] Ending imputation round 12/30, elapsed time 0.00
[IterativeImputer] Ending imputation round 13/30, elapsed time 0.00
[IterativeImputer] Ending imputation round 14/30, elapsed time 0.01
[I

array([[ 33.,   9., 170.],
       [ 27.,   3.,  80.]])