In [1]:
from Brownsville import BrownsvilleAPI
import pandas as pd 
import matplotlib.pyplot as plt

# Brownsville Analysis

This analysis uses the Brownsville API to perform a prelimanry analysis on the dataset. It explores the most common types of complaints over the years and identifies trends in the number of complaints reported over the years.

In [2]:
# brownsville = BrownsvilleAPI.Brownsville(force_load=True)
# brownsville = BrownsvilleAPI.Brownsville(force_load=True, update_map=False)
brownsville = BrownsvilleAPI.Brownsville()

Loading cached dataset...


In [None]:
brownsville.data["unitsres"].value_counts().sort_index()

## Building Coverage

In [None]:
print(f"Building coverage: {len(brownsville.buildings)}")

## Complaint types

In [None]:
building_common_complaints = []
for _id in brownsville.buildings:
    common_complaints = brownsville.get_feature_occurrences_by_building(
                                                            _id,
                                                            by=["majorcategory", "minorcategory"],
                                                            find_all=True)

    building_common_complaints.append((_id, common_complaints))
building_common_complaints.sort(key=lambda e: e[1].values.sum(), reverse=True)

In [None]:
for building in building_common_complaints[:10]:
    id_, complaints = building
    major_category, minor_category = complaints.index[0]
    num_complaints = complaints.values.sum()

    print("Building ID:", id_)
    print("Most common major category:", major_category)
    print("Most common minor category:", minor_category)
    print("Number of complaints:", num_complaints)

    print()

## Complaints over time

In [None]:
complaints_by_month = brownsville.records_by_date(period="year")
x, y = complaints_by_month.index, complaints_by_month.values
plt.bar(x, y)

In [None]:
x, y = brownsville.records_by_season()
plt.bar(x, y)
plt.title("Number of complaints by season")
plt.show()

In [None]:
complaints_by_month = brownsville.records_by_date()
x, y = complaints_by_month.index, complaints_by_month.values
plt.bar(x, y)
plt.title("Number of complaints by month")
plt.show()

In [None]:
steps = 4
years = brownsville.records_by_date(period="year", num_years=8, step=steps)

for year in years:

    x = list(year.index.values)
    y = list(year.values)
    label = f"{year.index[0]} to {year.index[-1]}"

    plt.plot(np.arange(0, steps), y, label=label)
    
plt.xlabel("Number of years")
plt.ylabel("Complaints reported")
plt.title("Number of complaints over a period of 8 years on 4 year intervals")
plt.legend()
plt.show()

In [None]:
brownsville.get_date_range("received")

In [None]:
len(brownsville.data["address"].unique())

In [None]:
brownsville.display_map()

In [None]:
brownsville.data["block"].astype(str).apply(str.zfill, args=(5,))
brownsville.data["lot"].astype(str).apply(str.zfill, args=(4,))

In [None]:
bbl = brownsville.data["boroughid"].astype(str) + brownsville.data["block"].astype(str) + brownsville.data["lot"].astype(str)
brownsville[.data]"bbl" = bbl
brbrownsville.data["bbl"]

In [None]:
from Brownsville.data_api import Client
with Client(data_path="./data/old") as c:
    df_pluto = c.load_pluto(
        fetch_all=True,
        select="bbl, bldgclass, bldgarea, numbldgs, numfloors, unitsres, unitstotal,"
            + "landuse, ownertype, ownername, yearbuilt, yearalter1, yearalter2",
        where="cd=316"
    )
    brownsville.data = pd.merge(
        brownsville.data,
        df_pluto,
        on="bbl",
        how="left"
    )

In [None]:
brownsville.data.columns

In [None]:
brownsville.data.dtypes

In [None]:
import pandas as pd 
# cdata = pd.read_csv("data\\brownsville_test\\complaint-problems-raw.csv", index_col=0)
cdata_2 = pd.read_csv("data\\Complaint_Problems.csv")
# hmain = pd.read_csv("data\\brownsville_test\\housing-maintenance-code-complaints-raw.csv", index_col=0)


In [None]:
cdata_2.shape

In [None]:
cdata_cid_unique = cdata["complaintid"].unique()
cdata_2_cid_unique = cdata_2["ComplaintID"].unique()
hmain_cid_unique = hmain["complaintid"].unique()

In [None]:
filter_1 = pd.Series([n in hmain_cid_unique for n in cdata["complaintid"]])
filter_3 = pd.Series([n in hmain_cid_unique for n in cdata_2["ComplaintID"]])
filter_2 = pd.Series([n in cdata_cid_unique for n in hmain["complaintid"]])

In [None]:
df_1 = cdata[filter_1]
df_2 = hmain[filter_2]
df_3 = hmain[filter_3]

In [None]:
print(filter_1.sum())
print(filter_2.sum())
print(filter_3.sum())

In [None]:
print(df_1["complaintid"].value_counts().sort_index())


In [None]:
print(df_1.shape)

In [None]:
print(df_2["complaintid"].value_counts().sort_index())


In [None]:
print(df_2.shape)

In [None]:
 
 merge_data = pd.merge(
    cdata,
    hmain,
    on="complaintid",
    how="inner",
)
merge_data = merge_data[
            ["complaintid", "buildingid", "boroughid", "borough", "housenumber",
             "streetname", "zip", "block", "lot", "apartment", "communityboard",
             "receiveddate", "status", "unittypeid", "spacetypeid",
             "typeid", "majorcategoryid", "minorcategoryid", "codeid",
             "statusdescription"]
        ]

In [None]:
merge_data.shape

In [None]:
# values = [str(c).lower() for c in cdata_2.columns.values]
dcolumn_translations = {str(c).lower():str(c).lower().rstrip() for c in column_translations.keys()}
ccolumn_translations

In [None]:
cdata_2.rename(columns=column_translations, inplace=True)


In [None]:
cdata_2.shape

In [None]:
columns = ["complaintid", "unittypeid", "spacetypeid" ,"typeid", "majorcategoryid", "minorcategoryid", "codeid","statusid", "statusdate", "statusdescription"]

cdata_2 = cdata_2[columns]

In [None]:
cdata_2.to_csv("data\\brownsville\\complaint-problems-raw.csv")

In [None]:
cd = pd.read_csv("data\\combined_data.csv")

In [None]:
cd.isna().sum()