In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Set aesthetic style for professional charts
sns.set_theme(style="whitegrid")
plt.rcParams['figure.figsize'] = (12, 7)

def run_housing_analysis():
    # 1. Load the datasets
    # Note: Ensure these CSV files are in your working directory
    try:
        permits = pd.read_csv('C:/Users/Karan/OneDrive/Desktop/RA/Permit_Requests.csv')
        unfit = pd.read_csv('C:/Users/Karan/OneDrive/Desktop/RA/Unfit_Properties.csv')
        violations = pd.read_csv('C:/Users/Karan/OneDrive/Desktop/RA/Code_Violations_V2.csv')
        print("Datasets loaded successfully.")
    except FileNotFoundError as e:
        print(f"Error: {e}. Please ensure the CSV files are in the same folder as this script.")
        return

    # 2. Preprocessing: Dates and Data Cleaning
    violations['violation_date'] = pd.to_datetime(violations['violation_date'], errors='coerce')
    unfit['violation_date'] = pd.to_datetime(unfit['violation_date'], errors='coerce')
    permits['Issue_Date'] = pd.to_datetime(permits['Issue_Date'], errors='coerce')

    # 3. Generating the 10 Visualizations

    # VIZ 1: Top 10 Violation Types
    plt.figure()
    violations['complaint_type_name'].value_counts().head(10).plot(kind='barh', color='skyblue')
    plt.title('3.1 Top 10 Housing Violation Types in Syracuse')
    plt.xlabel('Frequency')
    plt.gca().invert_yaxis()
    plt.tight_layout()
    plt.savefig('viz1_violation_types.png')
    plt.close()

    # VIZ 2: Monthly Trend (Seasonality)
    violations['month'] = violations['violation_date'].dt.month
    plt.figure()
    month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    sns.countplot(data=violations.dropna(subset=['month']), x='month', palette='viridis')
    plt.xticks(ticks=range(12), labels=month_names)
    plt.title('3.2 Monthly Distribution of Code Violations (Seasonality)')
    plt.savefig('viz2_seasonal_trends.png')
    plt.close()

    # VIZ 3: Status Breakdown
    plt.figure()
    violations['status_type_name'].value_counts().plot.pie(autopct='%1.1f%%', startangle=140, colors=['#66b3ff','#99ff99'])
    plt.title('3.3 Proportion of Open vs. Closed Violations')
    plt.ylabel('')
    plt.savefig('viz3_status_pie.png')
    plt.close()

    # VIZ 4: Unfit Properties by Zip Code
    plt.figure()
    unfit['zip'].value_counts().head(10).plot(kind='bar', color='salmon')
    plt.title('3.4 Unfit Properties Count by Zip Code')
    plt.xlabel('Zip Code')
    plt.ylabel('Number of Properties')
    plt.savefig('viz4_unfit_zip.png')
    plt.close()

    # VIZ 5: Top 10 Permit Types
    plt.figure()
    permits['Permit_Type'].value_counts().head(10).plot(kind='bar', color='teal')
    plt.title('3.5 Top 10 Building Permit Types')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.savefig('viz5_permit_types.png')
    plt.close()

    # VIZ 6: Violation Volume by Neighborhood
    plt.figure()
    violations['Neighborhood'].value_counts().head(15).plot(kind='bar', color='darkblue')
    plt.title('3.6 Top 15 Neighborhoods by Violation Volume')
    plt.ylabel('Total Count')
    plt.tight_layout()
    plt.savefig('viz6_nbh_volume.png')
    plt.close()

    # VIZ 7: Status Heatmap by Top Neighborhoods
    top_nbh = violations['Neighborhood'].value_counts().head(10).index
    subset = violations[violations['Neighborhood'].isin(top_nbh)]
    status_nbh = pd.crosstab(subset['Neighborhood'], subset['status_type_name'])
    plt.figure()
    sns.heatmap(status_nbh, annot=True, fmt='d', cmap='YlGnBu')
    plt.title('3.7 Violation Status Heatmap (Top 10 Neighborhoods)')
    plt.tight_layout()
    plt.savefig('viz7_status_heatmap.png')
    plt.close()

    # VIZ 8: Yearly Permit Trends
    permits['year'] = permits['Issue_Date'].dt.year
    plt.figure()
    yearly_permits = permits[permits['year'] > 2010].groupby('year').size()
    yearly_permits.plot(kind='line', marker='o', linewidth=2, color='green')
    plt.title('3.8 Yearly Building Permit Issuance (2011-2025)')
    plt.ylabel('Permit Count')
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.savefig('viz8_permit_yearly.png')
    plt.close()

    # VIZ 9: Compliance Windows (Boxplot)
    violations['comply_by_date'] = pd.to_datetime(violations['comply_by_date'], errors='coerce')
    violations['compliance_window'] = (violations['comply_by_date'] - violations['violation_date']).dt.days
    # Filtering for realistic windows (0-180 days) for better visualization
    filtered_comp = violations[(violations['compliance_window'] >= 0) & (violations['compliance_window'] <= 180)]
    plt.figure()
    sns.boxplot(data=filtered_comp, x='compliance_window', color='orange')
    plt.title('3.9 Distribution of Required Compliance Windows')
    plt.xlabel('Days from Violation to Required Compliance')
    plt.savefig('viz9_compliance_box.png')
    plt.close()

    # VIZ 10: Corrective Actions for Unfit Properties
    plt.figure()
    unfit['corrective_action'].value_counts().head(10).plot(kind='barh', color='purple')
    plt.title('3.10 Top 10 Corrective Actions for Unfit Properties')
    plt.xlabel('Frequency')
    plt.gca().invert_yaxis()
    plt.tight_layout()
    plt.savefig('viz10_corrective_actions.png')
    plt.close()

    print("Analysis complete. 10 visualizations saved as PNG files.")

if __name__ == "__main__":
    run_analysis = run_housing_analysis()

Error: [Errno 2] No such file or directory: 'C:/Users/Karan/OneDrive/Desktop/RA/Permit_Requests.csv'. Please ensure the CSV files are in the same folder as this script.
