In [1]:
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler, PolynomialFeatures

In [3]:
data={
    'age': [25, 30, 35, 40, None],
    'gender': ['M', 'F', 'M', 'F', 'M'],
    'income': [50000, 60000, None, 80000, 70000],
    'purchased': [0, 1, 0, 1, 0]
}
df = pd.DataFrame(data)

# Features and target
X = df.drop('purchased', axis=1)
y = df['purchased']

numeric_features = ['age', 'income']
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler()),
    ('poly', PolynomialFeatures(degree=2, include_bias=False))
])

categorical_features = ['gender']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder())
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Applying the preprocessor
X_preprocessed = preprocessor.fit_transform(X)

# Show the transformed feature matrix
print(X_preprocessed)


[[-1.5  -1.5   2.25  2.25  2.25  0.    1.  ]
 [-0.5  -0.5   0.25  0.25  0.25  1.    0.  ]
 [ 0.5   0.    0.25  0.    0.    0.    1.  ]
 [ 1.5   1.5   2.25  2.25  2.25  1.    0.  ]
 [ 0.    0.5   0.    0.    0.25  0.    1.  ]]


In [5]:
import pickle
file=open("preproces.pkl","wb")
pickle.dump(preprocessor,file) 
file.close()