In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
#to ignore warnings
import warnings
warnings.filterwarnings('ignore')
# Download dataset from URL
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
raw_df

In [None]:
raw_df.describe()

In [None]:
X = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])  #Features
y = raw_df.values[1::2, 2]  # Target (House Prices)

columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 
'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']
df = pd.DataFrame(X, columns=columns)
df['PRICE'] = y

# Splitting Dataset
X = df.drop('PRICE', axis=1)
y = df['PRICE']

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state=42)

# Model Training
model = LinearRegression()
model.fit(X_train, y_train)
LinearRegression()
# Prediction
y_pred = model.predict(X_test)
y_test_pred = model.predict(X_test)
y_train_pred = model.predict(X_train)
# Evaluation
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R-Squared Score:", r2_score(y_test, y_pred))
print("Predicted Prices:\n", y_pred[:5])

In [None]:
import matplotlib.pyplot as plt
plt.scatter(y_test, y_pred)
plt.xlabel("Actual Prices")
plt.ylabel("Predicted Prices")
plt.title("Actual Prices vs Predicted Prices")
plt.show()

In [None]:
plt.scatter(y_train,y_train_pred,color='blue', marker ='o', 
label='Training data')
plt.scatter(y_test,y_test_pred,color='lightgreen', marker ='o', 
label='Testing data')
# Labels and title
plt.xlabel("True Values")
plt.ylabel("Predicted")
plt.title("True Values vs Predicted")
plt.legend()
plt.show()
