In [2]:
## 1. Import Libraries & Create Sample Dataset
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder, OneHotEncoder, PolynomialFeatures
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression

In [3]:
# Sample dataset
data = {
    'age': [20, 25, 30, 35, 40, 45],
    'income': [20000, 25000, 30000, 35000, 40000, 45000],
    'city': ['Delhi', 'Mumbai', 'Delhi', 'Chennai', 'Mumbai', 'Chennai'],
    'purchased': [0, 1, 0, 1, 1, 0]
}

df = pd.DataFrame(data)
df

Unnamed: 0,age,income,city,purchased
0,20,20000,Delhi,0
1,25,25000,Mumbai,1
2,30,30000,Delhi,0
3,35,35000,Chennai,1
4,40,40000,Mumbai,1
5,45,45000,Chennai,0


In [4]:
## 2. Scaling (StandardScaler + MinMaxScaler)
scaler_std = StandardScaler()
scaler_mm = MinMaxScaler()

df['age_std'] = scaler_std.fit_transform(df[['age']])
df['income_mm'] = scaler_mm.fit_transform(df[['income']])

df

Unnamed: 0,age,income,city,purchased,age_std,income_mm
0,20,20000,Delhi,0,-1.46385,0.0
1,25,25000,Mumbai,1,-0.87831,0.2
2,30,30000,Delhi,0,-0.29277,0.4
3,35,35000,Chennai,1,0.29277,0.6
4,40,40000,Mumbai,1,0.87831,0.8
5,45,45000,Chennai,0,1.46385,1.0


In [5]:
## 3. Encoding (Label + OneHot)
# Label Encoding
label_enc = LabelEncoder()
df['city_label'] = label_enc.fit_transform(df['city'])

# One Hot Encoding
ohe = OneHotEncoder(sparse_output=False)
city_ohe = ohe.fit_transform(df[['city']])
ohe_df = pd.DataFrame(city_ohe, columns=ohe.get_feature_names_out(['city']))

df_encoded = pd.concat([df, ohe_df], axis=1)
df_encoded

Unnamed: 0,age,income,city,purchased,age_std,income_mm,city_label,city_Chennai,city_Delhi,city_Mumbai
0,20,20000,Delhi,0,-1.46385,0.0,1,0.0,1.0,0.0
1,25,25000,Mumbai,1,-0.87831,0.2,2,0.0,0.0,1.0
2,30,30000,Delhi,0,-0.29277,0.4,1,0.0,1.0,0.0
3,35,35000,Chennai,1,0.29277,0.6,0,1.0,0.0,0.0
4,40,40000,Mumbai,1,0.87831,0.8,2,0.0,0.0,1.0
5,45,45000,Chennai,0,1.46385,1.0,0,1.0,0.0,0.0


In [6]:
## 4. Polynomial Features (Age + Income)
poly = PolynomialFeatures(degree=2, include_bias=False)
poly_features = poly.fit_transform(df[['age', 'income']])

poly_df = pd.DataFrame(poly_features, columns=poly.get_feature_names_out(['age','income']))
poly_df

Unnamed: 0,age,income,age^2,age income,income^2
0,20.0,20000.0,400.0,400000.0,400000000.0
1,25.0,25000.0,625.0,625000.0,625000000.0
2,30.0,30000.0,900.0,900000.0,900000000.0
3,35.0,35000.0,1225.0,1225000.0,1225000000.0
4,40.0,40000.0,1600.0,1600000.0,1600000000.0
5,45.0,45000.0,2025.0,2025000.0,2025000000.0


In [7]:
## 5. Feature Selection (SelectKBest)
X = df[['age', 'income']]
y = df['purchased']

selector = SelectKBest(score_func=f_classif, k=1)
selector.fit(X, y)
selected_idx = selector.get_support(indices=True)

selected_features = X.columns[selected_idx]
selected_features

Index(['income'], dtype='object')

In [8]:
## 6. Pipeline (Scaling + Logistic Regression)
pipeline = Pipeline([
    ('scale', StandardScaler()),
    ('model', LogisticRegression())
])

pipeline.fit(X, y)

print("Pipeline trained successfully!")

Pipeline trained successfully!
