In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

## Load the dataset

In [None]:

url = "http://lib.stat.cmu.edu/datasets/boston"
data = pd.read_csv(url, sep="\s+", skiprows=22, nrows=506, header=None)



## Rename columns for clarity

In [None]:
# Rename columns for clarity
data.columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
print(data.head())

In [None]:
print(data.describe())

## Visualize correlations

In [None]:

plt.figure(figsize=(12, 8))
sns.heatmap(data.corr(), annot=True, fmt=".2f", cmap='coolwarm')
plt.show()

# Distribution of the target variable

In [None]:

plt.figure(figsize=(10, 6))
sns.histplot(data['MEDV'], bins=30, kde=True)
plt.title("Distribution of House Prices")
plt.xlabel("House Price")
plt.ylabel("Frequency")
plt.show()

In [None]:
# Define features and target
X = data.drop('MEDV', axis=1)
y = data['MEDV']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Train the model
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = model.predict(X_test_scaled)

Model Evaluation

In [None]:
# Calculate metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R-squared: {r2:.2f}")

Visualize Predictions

In [None]:
# Compare predicted vs actual values
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred)
plt.xlabel("Actual Prices")
plt.ylabel("Predicted Prices")
plt.title("Actual vs Predicted Prices")
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linewidth=2)
plt.show()

Making New Predictions

In [None]:
def predict_price(features):
    features_scaled = scaler.transform([features])
    return model.predict(features_scaled)

# Example usage
new_data = [0.1, 0, 7.07, 0, 0.469, 6.5, 40.1, 4.98, 3, 300, 15.3, 396.9, 4.98]  # Example feature values
predicted_price = predict_price(new_data)
print(f"Predicted House Price: ${predicted_price[0] * 1000:.2f}")