In [None]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

# ------------------------
# 1. Load and preprocess the Dataset
# ------------------------

In [None]:
try:
    dataset = pd.read_csv("HousingData.csv", encoding='latin1')
except:
    print("cant find data")
    exit()

In [None]:
# -Check for missing values
missing_values = dataset.isnull().sum()

if(missing_values.any()):
    # -Handle missing values 
    inputer = SimpleImputer(strategy='mean')
    dataset_inputer = pd.DataFrame(inputer.fit_transform(dataset), columns=dataset.columns)
else :
    print("No missing values")

# -------------------------------
# 2. Define Target Variables and Features
# ------------------------------

In [None]:
# selecting features and target variables
X_features = dataset_inputer[[ 'RM', 'LSTAT']]
y_target = dataset_inputer['MEDV']

# -------------------------------
# 3. Train and Compare Models
# ------------------------------

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_features, y_target, test_size=0.2, random_state=42)


# Train the Model
model = LinearRegression()
model.fit(X_train, y_train)


# Predict on test set
prediction = model.predict(X_test)

# Predict on training set
yprediction = model.predict(X_train)

# Evaluate the Model on test set
test_score = model.score(X_test, y_test)
test_mse = mean_squared_error(y_test, prediction)