# Gabriel Allen Fubara DE 2023/6442

This notebook loads the **housing_sales_1200.csv** dataset and generates 9 different graphs
to visualize the data.

The visualizations include:

1. Histogram of Sale Prices
2. Boxplot of Sale Prices by Property Type
3. Scatter Plot: Square Footage vs Sale Price
4. Average Sale Price by City (Bar Chart)
5. Days on Market Distribution
6. Heatmap of Correlation Between Numeric Features
7. Year Built vs Average Sale Price
8. HOA Fee Distribution by Property Type
9. Scatter Plot of Geographic Locations (Latitude vs Longitude) colored by Price


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set seaborn style
sns.set(style="whitegrid")

# Load the dataset
df = pd.read_csv("housing_sales_1200.csv")
df.head()


In [None]:
plt.figure(figsize=(10,6))
sns.histplot(df['sale_price'], bins=50, kde=True)
plt.title('Distribution of Sale Prices')
plt.xlabel('Sale Price')
plt.ylabel('Count')
plt.show()


In [None]:
plt.figure(figsize=(10,6))
sns.boxplot(x='property_type', y='sale_price', data=df)
plt.title('Sale Price by Property Type')
plt.xlabel('Property Type')
plt.ylabel('Sale Price')
plt.xticks(rotation=45)
plt.show()


In [None]:
plt.figure(figsize=(10,6))
sns.scatterplot(x='sqft', y='sale_price', hue='property_type', data=df, alpha=0.7)
plt.title('Square Footage vs Sale Price')
plt.xlabel('Square Footage')
plt.ylabel('Sale Price')
plt.legend(title='Property Type')
plt.show()


In [None]:
plt.figure(figsize=(12,6))
avg_price_city = df.groupby('city')['sale_price'].mean().sort_values(ascending=False)
sns.barplot(x=avg_price_city.index, y=avg_price_city.values)
plt.title('Average Sale Price by City')
plt.xlabel('City')
plt.ylabel('Average Sale Price')
plt.xticks(rotation=90)
plt.show()


In [None]:
plt.figure(figsize=(10,6))
sns.histplot(df['days_on_market'], bins=40, kde=False)
plt.title('Days on Market Distribution')
plt.xlabel('Days on Market')
plt.ylabel('Count')
plt.show()


In [None]:
plt.figure(figsize=(10,6))
corr = df[['beds', 'baths', 'sqft', 'lot_sqft', 'year_built', 'sale_price', 'days_on_market', 'hoa_fee']].corr()
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Heatmap')
plt.show()


In [None]:
plt.figure(figsize=(12,6))
avg_price_year = df.groupby('year_built')['sale_price'].mean()
sns.lineplot(x=avg_price_year.index, y=avg_price_year.values)
plt.title('Year Built vs Average Sale Price')
plt.xlabel('Year Built')
plt.ylabel('Average Sale Price')
plt.show()


In [None]:
plt.figure(figsize=(10,6))
sns.boxplot(x='property_type', y='hoa_fee', data=df)
plt.title('HOA Fee Distribution by Property Type')
plt.xlabel('Property Type')
plt.ylabel('HOA Fee')
plt.xticks(rotation=45)
plt.show()


In [None]:
plt.figure(figsize=(10,6))
sns.scatterplot(x='longitude', y='latitude', hue='sale_price', data=df, palette='viridis', alpha=0.7)
plt.title('Geographic Distribution of Sales')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.colorbar(label='Sale Price')
plt.show()
