# ISLP Chapter 2 - Question 10: Boston Housing Dataset Analysis using ISLP

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from islp import load_data
import warnings

warnings.filterwarnings('ignore')

# Load dataset from ISLP
df = load_data('Boston')

# Add column names
df.columns = [col.strip() for col in df.columns]
df.rename(columns={'MEDV': 'target'}, inplace=True)

# Basic information
print(f"Dataset shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
df.head()


ModuleNotFoundError: No module named 'islp'

In [None]:
# Correlation analysis
correlation_matrix = df.corr()
crime_correlations = correlation_matrix['CRIM'].abs().sort_values(ascending=False)

# Display correlation matrix
plt.figure(figsize=(12, 10))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f', square=True)
plt.title('Correlation Matrix of Boston Housing Dataset')
plt.show()


In [None]:
# Charles River analysis
charles_river_count = (df['CHAS'] == 1).sum()
print(f"Suburbs bounding Charles River: {charles_river_count}")
print(f"Percentage: {charles_river_count / len(df) * 100:.1f}%")


In [None]:
# Median pupil-teacher ratio
median_ptratio = df['PTRATIO'].median()
print(f"Median pupil-teacher ratio: {median_ptratio}")


In [None]:
# Lowest home value analysis
lowest_idx = df['target'].idxmin()
lowest_suburb = df.loc[lowest_idx]

print(f"Lowest median home value: ${lowest_suburb['target']:.1f}k")
print("Characteristics of this suburb:")
for col in df.columns:
    if col != 'target':
        percentile = (df[col] <= lowest_suburb[col]).mean() * 100
        print(f"{col}: {lowest_suburb[col]:.3f} ({percentile:.1f}th percentile)")


In [None]:
# Room analysis
rooms_7_plus = df[df['RM'] > 7]
rooms_8_plus = df[df['RM'] > 8]

print(f"Suburbs with >7 rooms per dwelling: {len(rooms_7_plus)}")
print(f"Suburbs with >8 rooms per dwelling: {len(rooms_8_plus)}")

if len(rooms_8_plus) > 0:
    print("\nCharacteristics of suburbs with >8 rooms:")
    print(rooms_8_plus.describe())
