In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load preprocessed data
data = pd.read_csv('preprocessed_clean_v4.csv')

# No need to set seaborn style manually — sns already styles your plots

# Histograms for all features and Price
plt.figure(figsize=(15, 10))
features = ['Rooms', 'Distance', 'Bedroom2', 'Bathroom', 'Car', 'Bedroom_Discrepancy', 'Price']
for i, col in enumerate(features):
    plt.subplot(3, 3, i+1)
    sns.histplot(data[col], kde=True)
    plt.title(f'Distribution of {col}')
    plt.xlabel(col)
    plt.ylabel('Count')
plt.tight_layout()
plt.savefig('histograms.png')
plt.close()

# Scatter plots of Price vs each feature
plt.figure(figsize=(15, 10))
for i, col in enumerate(features[:-1]):  # Exclude Price
    plt.subplot(2, 3, i+1)
    plt.scatter(data[col], data['Price'], alpha=0.5)
    plt.xlabel(col)
    plt.ylabel('Price ($)')
    plt.title(f'Price vs {col}')
plt.tight_layout()
plt.savefig('scatter_plots.png')
plt.close()

# Correlation heatmap
plt.figure(figsize=(10, 8))
corr = data.corr()
sns.heatmap(corr, annot=True, cmap='coolwarm', center=0, fmt='.2f')
plt.title('Correlation Heatmap')
plt.savefig('correlation_heatmap.png')
plt.close()

# Specific histogram for Bedroom_Discrepancy
plt.figure(figsize=(6, 4))
sns.histplot(data['Bedroom_Discrepancy'], kde=True, bins=20)
plt.title('Distribution of Bedroom_Discrepancy')
plt.xlabel('Bedroom_Discrepancy')
plt.ylabel('Count')
plt.savefig('bedroom_discrepancy_hist.png')
plt.close()
