In [4]:
# model_training.ipynb (Python code to be used inside a Jupyter notebook)

# Step 1: Import Libraries
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import pickle

# Step 2: Load the Boston Housing Dataset from external CSV
url = "https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv"
df = pd.read_csv(url)

# Step 3: Separate features and target
X = df.drop("medv", axis=1)  # 'medv' is the target variable
y = df["medv"]

# Step 4: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Train Linear Regression Model
model = LinearRegression()
model.fit(X_train, y_train)

# Step 6: Predict and Evaluate
y_pred = model.predict(X_test)
print("\nMean Squared Error:", mean_squared_error(y_test, y_pred))
print("R2 Score:", r2_score(y_test, y_pred))

# Step 7: Save Model using Pickle
with open("house_price_prediction.pkl", "wb") as f:
    pickle.dump(model, f)

print("\nModel saved as 'house_price_prediction.pkl'")



Mean Squared Error: 24.291119474973538
R2 Score: 0.6687594935356317

Model saved as 'house_price_prediction.pkl'
