In [None]:
import pandas as pd
y = pd.read_csv('sample_submission.csv').SalePrice
X = pd.read_csv('test.csv')

In [None]:
from sklearn.model_selection import train_test_split
X_train_full, X_val_full, y_train, y_val = train_test_split(X,y)

In [None]:
numeric_col = [cname for cname in X_train_full.columns if X_train_full[cname].dtype in ['int64','float64']]
category_col = [cname for cname in X_train_full.columns if X_train_full[cname].dtype =='object' and X_train_full[cname].nunique() < 10]

In [None]:
full_col = numeric_col + category_col
X_train = X_train_full[full_col].copy()
X_val = X_val_full[full_col].copy()

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer

numeric_transformer = SimpleImputer(strategy='constant')
category_transformer = Pipeline(steps=[
    ('impute',SimpleImputer(strategy='most_frequent')),
    ('oneHot', OneHotEncoder(handle_unknown='ignore'))
])

processor = ColumnTransformer(transformers=[
    ('num',numeric_transformer,numeric_col),
    ('cat',category_transformer, category_col)
]) 

In [None]:
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor(random_state=0)

In [None]:
my_pipeline = Pipeline(steps=[
    ('processor', processor),
    ('model', model)
])
my_pipeline.fit(X_train, y_train)

In [None]:
from sklearn.metrics import mean_absolute_error
pred = my_pipeline.predict(X_val)
print("MAE:", mean_absolute_error(y_val, pred))