In [1]:
import pandas as pd
import numpy as np
import re

In [2]:
df_noise = pd.read_csv('noise_data.csv')

In [3]:
df_noise['Created Date'] = pd.to_datetime(df_noise['Created Date'])

In [4]:
# Function to validate zip codes
def is_valid_zip(zip_code):
    return bool(re.match(r'^\d{5}$', str(zip_code)))

# Remove invalid zips
df_noise = df_noise[df_noise['Incident Zip'].apply(is_valid_zip)]

In [5]:
# Create Year and Month columns
df_noise['Year'] = df_noise['Created Date'].dt.year
df_noise['Month'] = df_noise['Created Date'].dt.month

In [6]:
# Group by Year, Zip, and Month, then count noise complaints
noise_by_zip_month = df_noise.groupby(['Year', 'Incident Zip', 'Month']).size().reset_index(name='NoiseComplaintCount')

In [7]:
# Calculate average monthly counts for each year and zipcode
noise_by_zip_year = noise_by_zip_month.groupby(['Year', 'Incident Zip'])['NoiseComplaintCount'].mean().reset_index(name='AvgMonthlyNoiseComplaints')

In [8]:
# Rename 'Incident Zip' to 'ZipCode' for consistency
noise_by_zip_year = noise_by_zip_year.rename(columns={'Incident Zip': 'ZipCode'})

In [9]:
# Sort the dataframe
noise_by_zip_year = noise_by_zip_year.sort_values(['ZipCode', 'Year'])

In [10]:
noise_by_zip_year.to_csv('cleaned_noise_data.csv', index=False)