In [1]:
import pandas as pd
import joblib

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score

# 1️⃣ Load dataset
data = pd.read_csv("data/house_data.csv")

# 2️⃣ Define features and target
X = data.drop(columns=["price"])
y = data["price"]

# 3️⃣ Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 4️⃣ Create model
model = LinearRegression()

# 5️⃣ Train model
model.fit(X_train, y_train)

# 6️⃣ Evaluate model
predictions = model.predict(X_test)

mae = mean_absolute_error(y_test, predictions)
r2 = r2_score(y_test, predictions)

print("Model Evaluation")
print("-----------------")
print("MAE:", mae)
print("R2 Score:", r2)

# 7️⃣ Save model
joblib.dump(model, "house-price-model.joblib")

print("\nModel saved successfully!")

Model Evaluation
-----------------
MAE: 21688.304360824237
R2 Score: 0.9140130326762934

Model saved successfully!
