In [None]:
import pandas as pd

# Load data
df = pd.read_csv("data/311_filtered_data.csv")
df.head()

# Data Cleaning Process
This notebook documents the data cleaning steps for NYC 311 data.
- Loaded data from Socrata API
- Converted date fields
- Dropped missing values
- Calculated Resolution Time
- Rounded Resolution Time to 2 decimals


In [3]:
df['created_date'] = pd.to_datetime(df['created_date'], errors='coerce')
df['closed_date'] = pd.to_datetime(df['closed_date'], errors='coerce')


In [4]:
df = df.dropna(subset=['created_date', 'closed_date', 'borough'])


In [5]:
df['Resolution Time (days)'] = (df['closed_date'] - df['created_date']).dt.total_seconds() / 86400


In [6]:
df.to_csv("311_cleaned_ready_for_tableau.csv", index=False)


# Drop latitude and longitude (reassign to df or use inplace)
df = df.drop(columns=['latitude', 'longitude'])

# Round Resolution Time (days) and update column
df['Resolution Time (days)'] = df['Resolution Time (days)'].round(2)

# Save the cleaned data (overwrite or new file)
df.to_csv("311_cleaned_ready_for_tableau.csv", index=False)


In [9]:
print(df.columns)


Index(['unique_key', 'created_date', 'closed_date', 'complaint_type',
       'borough', 'Resolution Time (days)'],
      dtype='object')


In [10]:
# Round Resolution Time (days)
df['Resolution Time (days)'] = df['Resolution Time (days)'].round(2)

# Save the updated file
df.to_csv("311_cleaned_ready_for_tableau.csv", index=False)


In [11]:
cleaned_df = pd.read_csv("311_cleaned_ready_for_tableau.csv")
cleaned_df.head()


Unnamed: 0,unique_key,created_date,closed_date,complaint_type,borough,Resolution Time (days)
0,56416396,2023-01-01 00:00:00,2023-01-03 14:56:48,Food Poisoning,QUEENS,2.62
1,56417527,2023-01-01 00:00:09,2023-01-01 00:36:06,Illegal Fireworks,BROOKLYN,0.02
2,56416252,2023-01-01 00:00:42,2023-01-01 17:34:15,Noise - Residential,BRONX,0.73
3,56418795,2023-01-01 00:00:45,2023-01-01 01:24:10,Illegal Parking,MANHATTAN,0.06
4,56418136,2023-01-01 00:00:46,2023-01-01 01:01:43,Noise - Residential,BROOKLYN,0.04
