Imports
---

In [6]:
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt

Importing Dataset house_price 
---

In [7]:
# Sample dataset
house_price = {
  "City": ["Pune", "Mumbai", "Nagpur", "Pune", "Mumbai"],
  "House_Size": [1200, 850, 1500, 900, 1100],
  "Price": [7500000, 11000000, 5500000, 7000000, 9500000],
  "Type": ["Apartment", "Flat", "Bungalow", "Flat", "Apartment"]
}

df = pd.DataFrame(house_price)
X = df.drop("Price", axis=1)
y = df["Price"]

Preprocessing
---

In [8]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

#Column based variable definition
numerical = ['House_Size']
categorical = ['City', 'Type']

#Transformers 
numerical_transformers = Pipeline(steps = [
    ("imputer", SimpleImputer(strategy="mean")),
    ("scaler", StandardScaler())
])

#Categorical Transformer
categorical_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])

#Combining with column Transformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformers, numerical),
        ('cat', categorical_transformer, categorical)
    ]
)

Final Pipeline (model training)
---

In [9]:
# Final Pipeline
from sklearn.linear_model import LinearRegression

model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('model', LinearRegression())
])

Fit & Predict
---

In [10]:
model_pipeline.fit(X,y)

sample = pd.DataFrame({
    'City' : ['Mumbai'],
    'House_Size' : [1000],
    'Type' : ['Flat']
})

predicted_price = model_pipeline.predict(sample)
print(predicted_price)

[16999999.99999999]
