In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

# Set style for better-looking plots
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Read the data
df = pd.read_csv('../data/Housing.csv')

# Display basic info
print("Dataset Shape:", df.shape)
print("\nFirst few rows:")
df.head()

# Create subplots for multiple charts
fig, axes = plt.subplots(2, 2, figsize=(20, 16))
fig.suptitle('Housing Characteristics Distribution', fontsize=20, fontweight='bold', y=0.95)

# 1. Distribution of Bedrooms
bedroom_counts = df['bedrooms'].value_counts().sort_index()
bars1 = axes[0,0].bar(bedroom_counts.index, bedroom_counts.values, 
                     color=['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FECA57', '#FF9FF3'])
axes[0,0].set_title('Distribution of Bedrooms', fontsize=16, fontweight='bold', pad=20)
axes[0,0].set_xlabel('Number of Bedrooms', fontsize=12)
axes[0,0].set_ylabel('Number of Houses', fontsize=12)

# Add percentages on top of bars
total = len(df)
for i, (bedroom, count) in enumerate(bedroom_counts.items()):
    percentage = (count / total) * 100
    axes[0,0].text(bedroom, count + 5, f'{count}\n({percentage:.1f}%)', 
                  ha='center', va='bottom', fontsize=10, fontweight='bold')

# 2. Distribution of Bathrooms
bathroom_counts = df['bathrooms'].value_counts().sort_index()
bars2 = axes[0,1].bar(bathroom_counts.index, bathroom_counts.values,
                     color=['#54A0FF', '#5F27CD', '#00D2D3', '#FF9FF3', '#F368E0'])
axes[0,1].set_title('Distribution of Bathrooms', fontsize=16, fontweight='bold', pad=20)
axes[0,1].set_xlabel('Number of Bathrooms', fontsize=12)
axes[0,1].set_ylabel('Number of Houses', fontsize=12)

# Add percentages on top of bars
for i, (bathroom, count) in enumerate(bathroom_counts.items()):
    percentage = (count / total) * 100
    axes[0,1].text(bathroom, count + 5, f'{count}\n({percentage:.1f}%)', 
                  ha='center', va='bottom', fontsize=10, fontweight='bold')

# 3. Distribution of Stories
stories_counts = df['stories'].value_counts().sort_index()
bars3 = axes[1,0].bar(stories_counts.index, stories_counts.values,
                     color=['#FF9FF3', '#FECA57', '#FF6B6B', '#48DBFB', '#1DD1A1'])
axes[1,0].set_title('Distribution of Stories', fontsize=16, fontweight='bold', pad=20)
axes[1,0].set_xlabel('Number of Stories', fontsize=12)
axes[1,0].set_ylabel('Number of Houses', fontsize=12)

# Add percentages on top of bars
for i, (story, count) in enumerate(stories_counts.items()):
    percentage = (count / total) * 100
    axes[1,0].text(story, count + 5, f'{count}\n({percentage:.1f}%)', 
                  ha='center', va='bottom', fontsize=10, fontweight='bold')

# 4. Distribution of Parking
parking_counts = df['parking'].value_counts().sort_index()
bars4 = axes[1,1].bar(parking_counts.index, parking_counts.values,
                     color=['#00D2D3', '#54A0FF', '#5F27CD', '#FF9FF3', '#F368E0', '#FF6B6B'])
axes[1,1].set_title('Distribution of Parking Spaces', fontsize=16, fontweight='bold', pad=20)
axes[1,1].set_xlabel('Number of Parking Spaces', fontsize=12)
axes[1,1].set_ylabel('Number of Houses', fontsize=12)

# Add percentages on top of bars
for i, (parking, count) in enumerate(parking_counts.items()):
    percentage = (count / total) * 100
    axes[1,1].text(parking, count + 5, f'{count}\n({percentage:.1f}%)', 
                  ha='center', va='bottom', fontsize=10, fontweight='bold')

# Adjust layout
plt.tight_layout()
plt.show()

# Price distribution analysis
plt.figure(figsize=(15, 6))

# Create price ranges
price_ranges = [0, 3000000, 6000000, 9000000, 12000000, 15000000]
price_labels = ['<3M', '3M-6M', '6M-9M', '9M-12M', '12M+']

df['price_range'] = pd.cut(df['price'], bins=price_ranges, labels=price_labels)
price_dist = df['price_range'].value_counts().sort_index()

# Create bar chart
bars = plt.bar(price_dist.index, price_dist.values, 
               color=['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FECA57'])
plt.title('Distribution of House Prices', fontsize=16, fontweight='bold', pad=20)
plt.xlabel('Price Range (in â‚¹)', fontsize=12)
plt.ylabel('Number of Houses', fontsize=12)

# Add percentages on top of bars
total = len(df)
for i, (price_range, count) in enumerate(price_dist.items()):
    percentage = (count / total) * 100
    plt.text(i, count + 5, f'{count}\n({percentage:.1f}%)', 
             ha='center', va='bottom', fontsize=10, fontweight='bold')

plt.tight_layout()
plt.show()

# Area distribution analysis
plt.figure(figsize=(15, 6))

# Create area ranges
area_ranges = [0, 3000, 6000, 9000, 12000, 15000, 18000]
area_labels = ['<3K', '3K-6K', '6K-9K', '9K-12K', '12K-15K', '15K+']

df['area_range'] = pd.cut(df['area'], bins=area_ranges, labels=area_labels)
area_dist = df['area_range'].value_counts().sort_index()

# Create bar chart
bars = plt.bar(area_dist.index, area_dist.values, 
               color=['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FECA57', '#FF9FF3'])
plt.title('Distribution of House Areas', fontsize=16, fontweight='bold', pad=20)
plt.xlabel('Area Range (sq ft)', fontsize=12)
plt.ylabel('Number of Houses', fontsize=12)

# Add percentages on top of bars
total = len(df)
for i, (area_range, count) in enumerate(area_dist.items()):
    percentage = (count / total) * 100
    plt.text(i, count + 5, f'{count}\n({percentage:.1f}%)', 
             ha='center', va='bottom', fontsize=10, fontweight='bold')

plt.tight_layout()
plt.show()