In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [None]:
# Load the Boston Housing dataset
boston = load_boston()
df = pd.DataFrame(boston.data, columns=boston.feature_names)
df['MEDV'] = boston.target

# Display the first 5 rows of the dataset
df.head()

In [None]:
# Dataset statistics
df.describe()

In [None]:
# Check for null values
df.isnull().sum()

In [None]:
# Visualize the distribution of the target variable (MEDV)
sns.histplot(df['MEDV'], bins=30)
plt.show()

In [None]:
# Correlation matrix heatmap
plt.figure(figsize=(14, 10))
sns.heatmap(df.corr().round(2), annot=True, cmap='coolwarm')
plt.show()

In [None]:
# Splitting the dataset into training and testing sets
X = df.drop('MEDV', axis=1)
y = df['MEDV']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Linear Regression Model
lm = LinearRegression()
lm.fit(X_train, y_train)

# Model evaluation
y_pred = lm.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')