# NYC Flood-Related 311 Complaints Analysis

This notebook demonstrates the analysis of flood-related 311 complaints in New York City for 2019, including spatial patterns and socioeconomic factors.

## Setup and Data Loading

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys
import folium
from folium.plugins import HeatMap
from IPython.display import display, HTML

# Add scripts directory to path
sys.path.append('../scripts')

# Import custom modules
import data_processing
import visualization
import socioeconomic_analysis
import interactive_map

# Set paths
DATA_DIR = "../data"
FIGURES_DIR = "../figures"
RESULTS_DIR = "../results"

# Create directories if they don't exist
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(os.path.join(DATA_DIR, "raw"), exist_ok=True)
os.makedirs(os.path.join(DATA_DIR, "processed"), exist_ok=True)
os.makedirs(FIGURES_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)

## Data Processing

First, we'll process the NYC 311 data to extract flood-related complaints and join them with census data.

In [None]:
# Process the data
complaints_df, census_gdf = data_processing.download_and_prepare_data()
flood_complaints_df = data_processing.filter_flood_complaints(complaints_df)
joined_df = data_processing.spatial_join_with_census(flood_complaints_df, census_gdf)
aggregated_gdf = data_processing.aggregate_by_census_tract(joined_df, census_gdf)

# Display the first few rows of the processed data
print(f"Total complaints: {len(complaints_df)}")
print(f"Flood-related complaints: {len(flood_complaints_df)}")
print(f"Complaints with census tract: {len(joined_df)}")
print(f"Census tracts with complaints: {len(aggregated_gdf)}")

flood_complaints_df.head()

## Basic Visualizations

Let's create some basic visualizations to understand the spatial distribution of flood-related complaints.

In [None]:
# Create choropleth maps
visualization.create_choropleth_map(
    aggregated_gdf,
    'complaint_count',
    'NYC Flood-Related 311 Complaints (2019) - Count by Census Tract',
    'flood_complaints_count_choropleth.png'
)

visualization.create_choropleth_map(
    aggregated_gdf,
    'complaint_rate',
    'NYC Flood-Related 311 Complaints (2019) - Rate by Census Tract',
    'flood_complaints_rate_choropleth.png',
    cmap='YlOrRd'
)

# Display the images
from IPython.display import Image
display(Image(os.path.join(FIGURES_DIR, 'flood_complaints_count_choropleth.png')))
display(Image(os.path.join(FIGURES_DIR, 'flood_complaints_rate_choropleth.png')))

## Interactive Maps

Now, let's create interactive maps that allow us to hover over areas to see detailed information.

In [None]:
# Create interactive choropleth map for complaint count
count_map = interactive_map.create_interactive_choropleth(
    aggregated_gdf,
    'complaint_count',
    'NYC Flood-Related 311 Complaints (2019) - Count by Census Tract',
    'interactive_flood_complaints_count.html',
    legend_name='Complaint Count'
)

# Display the interactive map
display(count_map)

In [None]:
# Create interactive choropleth map for complaint rate
rate_map = interactive_map.create_interactive_choropleth(
    aggregated_gdf,
    'complaint_rate',
    'NYC Flood-Related 311 Complaints (2019) - Rate by Census Tract',
    'interactive_flood_complaints_rate.html',
    legend_name='Complaint Rate (per 1000 people)'
)

# Display the interactive map
display(rate_map)

## Socioeconomic Analysis

Let's analyze the relationship between flood-related complaints and socioeconomic factors.

In [None]:
# Calculate descriptive statistics
desc_stats = socioeconomic_analysis.calculate_descriptive_statistics(aggregated_gdf)
print("Descriptive Statistics:")
display(desc_stats)

# Calculate correlations
corr_matrix = socioeconomic_analysis.calculate_correlations(aggregated_gdf)
print("\nCorrelation Matrix:")
display(corr_matrix)

# Create correlation heatmap
socioeconomic_analysis.create_correlation_heatmap(corr_matrix, 'Correlation Heatmap', 'correlation_heatmap.png')
display(Image(os.path.join(FIGURES_DIR, 'correlation_heatmap.png')))

In [None]:
# Run regression models
ols_results = socioeconomic_analysis.run_ols_regression(aggregated_gdf)
print("OLS Regression Results:")
print(ols_results.summary())

# Run random forest model
rf_importance = socioeconomic_analysis.run_random_forest(aggregated_gdf)
print("\nRandom Forest Feature Importance:")
display(rf_importance)

# Create feature importance plot
socioeconomic_analysis.create_feature_importance_plot(rf_importance, 'Random Forest Feature Importance', 'rf_feature_importance.png')
display(Image(os.path.join(FIGURES_DIR, 'rf_feature_importance.png')))

## Interactive Bivariate Analysis

Let's create an interactive map that shows the relationship between complaint rates and socioeconomic factors.

In [None]:
# Create a custom interactive map showing complaint rate vs median income
def create_bivariate_interactive_map(gdf, var1, var2, var1_name, var2_name, title, filename):
    """Create an interactive bivariate map."""
    # Create a copy to avoid modifying the original
    gdf_copy = gdf.copy()
    
    # Ensure the GeoDataFrame has the right CRS for Folium
    if gdf_copy.crs and gdf_copy.crs != "EPSG:4326":
        gdf_copy = gdf_copy.to_crs("EPSG:4326")
    
    # Calculate center of the map
    center = [gdf_copy.geometry.centroid.y.mean(), gdf_copy.geometry.centroid.x.mean()]
    
    # Create the map
    m = folium.Map(location=center, zoom_start=11, tiles="cartodbpositron")
    
    # Add title
    title_html = f'''
        <h3 align="center" style="font-size:16px"><b>{title}</b></h3>
    '''
    m.get_root().html.add_child(folium.Element(title_html))
    
    # Normalize the variables
    gdf_copy[f'{var1}_norm'] = (gdf_copy[var1] - gdf_copy[var1].min()) / (gdf_copy[var1].max() - gdf_copy[var1].min())
    gdf_copy[f'{var2}_norm'] = (gdf_copy[var2] - gdf_copy[var2].min()) / (gdf_copy[var2].max() - gdf_copy[var2].min())
    
    # Define a function to determine color based on both variables
    def get_color(var1_val, var2_val):
        # Both low
        if var1_val < 0.33 and var2_val < 0.33:
            return '#e8e8e8'  # Light gray
        # Low var1, high var2
        elif var1_val < 0.33 and var2_val >= 0.66:
            return '#73ae80'  # Green
        # High var1, low var2
        elif var1_val >= 0.66 and var2_val < 0.33:
            return '#6c83b5'  # Blue
        # Both high
        elif var1_val >= 0.66 and var2_val >= 0.66:
            return '#2a5a5b'  # Dark teal
        # Medium values
        else:
            return '#b8d6be' if var2_val > var1_val else '#b5c0da'  # Light green or light blue
    
    # Add color to the GeoDataFrame
    gdf_copy['color'] = gdf_copy.apply(lambda row: get_color(row[f'{var1}_norm'], row[f'{var2}_norm']), axis=1)
    
    # Define a function to style the features
    def style_function(feature):
        return {
            'fillColor': feature['properties']['color'],
            'color': 'black',
            'weight': 0.5,
            'fillOpacity': 0.7
        }
    
    # Define a function for hover styling
    def highlight_function(feature):
        return {
            'weight': 3,
            'color': 'black',
            'fillOpacity': 0.9
        }
    
    # Convert the GeoDataFrame to GeoJSON
    geojson_data = gdf_copy.to_json()
    
    # Add the GeoJSON layer to the map
    folium.GeoJson(
        geojson_data,
        style_function=style_function,
        highlight_function=highlight_function,
        tooltip=folium.GeoJsonTooltip(
            fields=[var1, var2, 'GEOID', 'pct_college', 'pct_minority'],
            aliases=[var1_name, var2_name, 'Census Tract', 'College Education (%)', 'Minority (%)'],
            localize=True,
            sticky=False,
            labels=True,
            style="""
                background-color: #F0EFEF;
                border: 2px solid black;
                border-radius: 3px;
                box-shadow: 3px;
            """,
            max_width=800,
        ),
    ).add_to(m)
    
    # Add a legend
    legend_html = '''
    <div style="position: fixed; bottom: 50px; right: 50px; z-index: 1000; background-color: white; 
                padding: 10px; border: 2px solid grey; border-radius: 5px;">
        <p><b>Legend</b></p>
        <div style="display: grid; grid-template-columns: auto auto; grid-gap: 5px;">
            <div style="background-color: #e8e8e8; width: 20px; height: 20px; border: 1px solid black;"></div>
            <div>Low {0}, Low {1}</div>
            <div style="background-color: #73ae80; width: 20px; height: 20px; border: 1px solid black;"></div>
            <div>Low {0}, High {1}</div>
            <div style="background-color: #6c83b5; width: 20px; height: 20px; border: 1px solid black;"></div>
            <div>High {0}, Low {1}</div>
            <div style="background-color: #2a5a5b; width: 20px; height: 20px; border: 1px solid black;"></div>
            <div>High {0}, High {1}</div>
        </div>
    </div>
    '''.format(var1_name, var2_name)
    
    m.get_root().html.add_child(folium.Element(legend_html))
    
    # Save the map
    m.save(os.path.join(FIGURES_DIR, filename))
    
    return m

# Create bivariate map for complaint rate vs median income
bivariate_map = create_bivariate_interactive_map(
    aggregated_gdf,
    'complaint_rate',
    'median_income',
    'Complaint Rate',
    'Median Income',
    'NYC Flood Complaints vs Median Income',
    'interactive_flood_complaints_vs_income.html'
)

# Display the interactive map
display(bivariate_map)

## Conclusion

This analysis has demonstrated the spatial patterns of flood-related 311 complaints in NYC and their relationship with socioeconomic factors. Key findings include:

1. Flood-related complaints are not evenly distributed across NYC, with some areas experiencing significantly higher rates.
2. There are correlations between complaint rates and socioeconomic factors such as income, education, and minority population.
3. The interactive maps provide a powerful tool for exploring these relationships and identifying areas that may need additional resources or attention.

Future work could include:
- Incorporating additional data sources such as flood zone maps and elevation data
- Analyzing temporal patterns in relation to weather events
- Developing predictive models to forecast areas at risk of flooding based on complaints and other factors