# Exploratory Data Analysis

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [None]:
df = pd.read_csv('../data/housing.csv')

df.head()

In [None]:
df = df.drop(columns=['id'], axis=1)
print(df.shape) # Confirm your dataset has 20 columns

In [None]:
plt.figure(figsize=(10, 6))

sns.histplot(df['price'], bins=50, kde=True)

plt.title('Distibution of House Prices')
plt.xlabel('Price')
plt.ylabel('Count')

plt.savefig('../images/price_distribution.jpg', dpi=300, format='jpg')

plt.show()


In [None]:
plt.figure(figsize=(8, 4))

sns.boxplot(x=df['price'], width=0.3)

plt.title('Boxplot of House Prices')
plt.xlabel('Price')

plt.savefig('../images/price_boxplot.jpg', dpi=300, format='jpg')

plt.show()

In [None]:
plt.figure(figsize=(8, 4))

sns.countplot(data=df, x='bedrooms')

plt.title('Distribution of Bedrooms')
plt.xlabel('Number of Bedrooms')
plt.ylabel('Count')

plt.savefig('../images/bedrooms_distribution.jpg', dpi=300, format='jpg')

plt.show()

In [None]:
plt.figure(figsize=(12, 10))

corr = df.corr(numeric_only=True)

sns.heatmap(corr[['price']].sort_values(by='price', ascending=False), annot=True, cmap='coolwarm')

plt.title('Correlation of Features with Price')

plt.savefig('../images/correlation_heatmap.jpg', dpi=300, format='jpg')

plt.show()

In [None]:
plt.figure(figsize=(10,6))

sns.boxplot(data=df, x='condition', y='price', width=0.3)

plt.title('Price by Condition')
plt.xlabel('Condition')
plt.ylabel('Price')

plt.savefig('../images/price_by_condition.jpg', dpi=300, format='jpg')

plt.show()