# imports

In [1]:
import requests
import pandas as pd

# acquire data

In [2]:
api_url = "https://opendata.maryland.gov/api/id/crti-ybyp.json?$select=*&$order=`:id`+ASC&$limit=3000&$offset=0 "
response = requests.get(api_url)
data = response.json()

# clean data

In [7]:
df = pd.DataFrame(data)
backup = pd.DataFrame(data)

# analyze data

In [None]:
#df.dtypes

df['recieved_date'] = pd.to_datetime(df['recieved_date'])
df['incident_date'] = pd.to_datetime(df['incident_date'])
df['incident_closed_date'] = pd.to_datetime(df['incident_closed_date'])

In [10]:
year_ago_date = pd.Timestamp('2023-03-09')

last_year_df = df[df["recieved_date"] > year_ago_date].copy()

last_year_df["county"].value_counts()

county
Baltimore City        101
Anne Arundel           85
Prince George's        83
Frederick              62
Baltimore              57
Montgomery             35
Cecil                  25
Dorchester             17
Harford                17
Howard                 15
Allegany               15
Washington             14
Charles                12
Carroll                 9
Wicomico                8
Not Yet Determined      7
Worcester               7
St. Mary's              6
Garrett                 5
Caroline                3
Queen Anne's            2
Somerset                2
Talbot                  1
Kent                    1
Statewide               1
Name: count, dtype: int64

In [12]:
last_year_df["complaint_type"] = last_year_df["complaint_type"].str.split(', ')

In [14]:
last_year_df[0:5].explode("complaint_type")

Unnamed: 0,sno,complaint,complaint_description,complaint_type,recieved_date,incident_date,county,incident_closed_date,incident_status_desc,incident_zip
0,1,232264,concern of loud noise in the Cheverly area,Other,2024-03-06,2024-03-06,Prince George's,2024-03-08,Incident Closed-Managed,
1,2,232263,concern of loud noise in the Cheverly area,Other,2024-03-06,2024-03-06,Prince George's,2024-03-08,Incident Closed-Managed,
2,3,232262,concern of loud noise in the Cheverly area,Other,2024-03-06,2024-03-06,Prince George's,2024-03-08,Incident Closed-Managed,
3,4,232261,concern of loud noise in the Cheverly area,Other,2024-03-06,2024-03-06,Prince George's,2024-03-08,Incident Closed-Managed,
4,5,232260,concern of loud noise in the Cheverly area,Other,2024-03-06,2024-03-06,Prince George's,2024-03-08,Incident Closed-Managed,


In [15]:
complaints_by_type = last_year_df.explode("complaint_type")

In [16]:
last_year_by_type = complaints_by_type["complaint_type"].value_counts()

last_year_by_type

complaint_type
Air                                   223
Odor                                  186
Other                                 126
Smoke                                  56
Fugitive Dust/Particulate Matter       45
Fumes                                  39
Open Burning                           30
Asbestos Complaint                      3
Air Pollutant Release                   2
Noise Complaint                         2
Non-Tidal Wetlands/Waterway             2
Suspected Operation without Permit      1
ARA AQCP non-regulated entity           1
Name: count, dtype: int64

In [17]:
null_county_rows = last_year_df[last_year_df['county'].isnull()]
wrong_county_rows = last_year_df[last_year_df['county'].isin(["Not Yet Determined", "Outside of Maryland", "Statewide"])]

# Export each one separately.
null_county_rows.to_csv("exported_data/null_counties.csv")
wrong_county_rows.to_csv("exported_data/wrong_counties.csv")

In [18]:
last_year_df = last_year_df[last_year_df['county'].isnull() == False]
last_year_df = last_year_df[last_year_df['county'].isin(["Not Yet Determined", "Outside of Maryland", "Statewide"]) == False]

In [19]:
last_year_by_county = last_year_df["county"].value_counts()

last_year_by_county

county
Baltimore City     101
Anne Arundel        85
Prince George's     83
Frederick           62
Baltimore           57
Montgomery          35
Cecil               25
Harford             17
Dorchester          17
Howard              15
Allegany            15
Washington          14
Charles             12
Carroll              9
Wicomico             8
Worcester            7
St. Mary's           6
Garrett              5
Caroline             3
Queen Anne's         2
Somerset             2
Talbot               1
Kent                 1
Name: count, dtype: int64

# export data

In [11]:
df.to_csv(
    f"exported_data/all_complaints.csv",
    index=False,
    columns=[
        "complaint",
        "incident_date",
        "county",
        "incident_zip",
        "complaint_type",
        "complaint_description",
        "recieved_date",
        "incident_status_desc",
        "incident_closed_date",
    ]
)

In [20]:
last_year_by_type.to_csv("exported_data/complaint_typef_frequency.csv")
last_year_by_county.to_csv("exported_data/county_frequency.csv")