In [9]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer

data = pd.read_csv("Housing.csv")
data.price

0      13300000
1      12250000
2      12250000
3      12215000
4      11410000
         ...   
540     1820000
541     1767150
542     1750000
543     1750000
544     1750000
Name: price, Length: 545, dtype: int64

In [18]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder ,StandardScaler
from sklearn.compose import ColumnTransformer

def load_data():
    house = pd.read_csv("Housing.csv")

    X = house.drop("price", axis=1)
    y = house["price"]

    return X, y

def pre_process(X, y, test_size = 0.2, random_state = 42):
    return train_test_split(X,y,test_size=test_size,random_state=random_state)

def build_pipeline(X):
    
    numerical_features = X.select_dtypes(include=np.number).columns.tolist()
    categorical_features = X.select_dtypes(include="object").columns.tolist()
    
    numerical_transformer = Pipeline([
        ("Imputer", SimpleImputer(strategy="mean")),
        ("Scaler", StandardScaler())
    ])
    
    categorical_transformer = Pipeline([
        ("Imputer",SimpleImputer(strategy="most_frequent")),
        ("Onehot", OneHotEncoder(handle_unknown="ignore"))
    ])
    
    preprocessor = ColumnTransformer([
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])
    
    pipe = Pipeline([
        ("Preprocessor", preprocessor),
        ("model", LinearRegression())
    ])
    return pipe
    
def evaluate(model, X_test, y_test):
    predicted = model.predict(X_test)
    MAE = mean_absolute_error(y_test,predicted)
    MSE = mean_squared_error(y_test,predicted)
    RMAE = np.sqrt(MSE)
    R2 = r2_score(y_test,predicted)
    
    print(f"MAE: {MAE}")
    print(f"MSE: {MSE}")
    print(f"RMAE: {RMAE}")
    print(f"R^2: {R2}")
    
if __name__ == "__main__":
    X, y = load_data()
    X_train, X_test, y_train, y_test = pre_process(X, y)
    
    pipe = build_pipeline(X_train)
    pipe.fit(X_train, y_train)
    evaluate(pipe, X_test,y_test)
  

MAE: 970043.4039201644
MSE: 1754318687330.6685
RMAE: 1324506.9600914405
R^2: 0.6529242642153175
