In [1]:
import BrownsvilleAPI
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt

# Brownsville Analysis

This analysis uses the Brownsville API to perform a prelimanry analysis on the dataset. It explores the most common types of complaints over the years and identifies trends in the number of complaints reported over the years.

In [None]:
# brownsville = BrownsvilleAPI.Brownsville(force_load=True)
brownsville = BrownsvilleAPI.Brownsville()

In [None]:
brownsville.data.head()

## Building Coverage

In [None]:
print(f"Building coverage: {len(brownsville.buildings)}")

## Complaint types

In [None]:
building_common_complaints = []
for _id in brownsville.buildings:
    common_complaints = brownsville.get_feature_occurrences_by_building(
                                                            _id,
                                                            by=["majorcategory", "minorcategory"],
                                                            find_all=True)

    building_common_complaints.append((_id, common_complaints))
building_common_complaints.sort(key=lambda e: e[1].values.sum(), reverse=True)

In [None]:
for building in building_common_complaints[:10]:
    id_, complaints = building
    major_category, minor_category = complaints.index[0]
    num_complaints = complaints.values.sum()

    print("Building ID:", id_)
    print("Most common major category:", major_category)
    print("Most common minor category:", minor_category)
    print("Number of complaints:", num_complaints)

    print()

## Complaints over time

In [None]:
complaints_by_month = brownsville.records_by_date(period="year")
x, y = complaints_by_month.index, complaints_by_month.values
plt.bar(x, y)

In [None]:
x, y = brownsville.records_by_season()
plt.bar(x, y)
plt.title("Number of complaints by season")
plt.show()

In [None]:
complaints_by_month = brownsville.records_by_date()
x, y = complaints_by_month.index, complaints_by_month.values
plt.bar(x, y)
plt.title("Number of complaints by month")
plt.show()

In [None]:
steps = 4
years = brownsville.records_by_date(period="year", num_years=8, step=steps)

for year in years:

    x = list(year.index.values)
    y = list(year.values)
    label = f"{year.index[0]} to {year.index[-1]}"

    plt.plot(np.arange(0, steps), y, label=label)
    
plt.xlabel("Number of years")
plt.ylabel("Complaints reported")
plt.title("Number of complaints over a period of 8 years on 4 year intervals")
plt.legend()
plt.show()