In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from joblib import dump, load


In [2]:
housing = pd.read_csv("data.csv")


In [3]:
X = housing.drop("MEDV", axis=1)
y = housing["MEDV"]


In [4]:
imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(X)
X = pd.DataFrame(X, columns=housing.columns[:-1])


In [5]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [6]:
# Create a pipeline for preprocessing and modeling
model_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy="median")),
    ('std_scaler', StandardScaler()),
    ('regressor', LinearRegression())
])


In [7]:
# Fit the model
model_pipeline.fit(X_train, y_train)


In [8]:
y_pred = model_pipeline.predict(X_test)

In [9]:
# Calculate the RMSE (Root Mean Squared Error)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print("RMSE:", rmse)


RMSE: 4.94012400534101


In [10]:
# Save the model to a file
dump(model_pipeline, 'LinearRegressionModel.joblib')

['LinearRegressionModel.joblib']

In [11]:
# Load the model
model = load('LinearRegressionModel.joblib')


In [12]:
# Example input for prediction
input_data = np.array([[-0.43942006, 3.12628155, -1.12165014, -0.27288841, -1.42262747,
                        -0.23979304, -1.31238772, 2.61111401, -1.0016859, -0.5778192,
                        -0.97491834, 0.41164221, -0.86091034]])


In [13]:
# Predict house price
predicted_price = model.predict(input_data)
print("Predicted Price:", predicted_price)


Predicted Price: [50.34802822]


