# Feature Information
<table style='margin-left: auto; margin-right: auto'>
    <tr>
        <th colspan='3'> Cleaned DOHMH Dog Bite Data </th>
    <tr>
    <tr>
        <th> Column Name </th>
        <th> Description </th>
        <th> Data Type </th>
    </tr>
    <tr>
        <td> spay_neuter </th>
        <td> Surgical removal of dog's reproductive organs. True (reported to DOHMH as Spayed or Neutered), False (Unknown or Not Spayed or Neutered) </th>
        <td> Boolean </th>
    </tr>
    <tr>
        <td> borough </th>
        <td> Dog bite Borough. </th>
        <td> Text </th>
    </tr>
    <tr>
        <td> year </th>
        <td> Reported on a specific year </th>
        <td> Integer </th>
    </tr>
    <tr>
        <td> month </th>
        <td> Reported on a specific month </th>
        <td> Integer </th>
    </tr>
    <tr>
        <td> day_of_week </th>
        <td> Reported on a specific Day of Week </th>
        <td> Integer </th>
    </tr>
    <tr>
        <td> mixed/other </th>
        <td> Indicates that the dog was a mixed or other breed. Multiple True values indicate mixed dog breed. </th>
        <td> Boolean </th>
    </tr>
    <tr>
        <td> pit_bull </th>
        <td> Indicates that the dog was a pit bull. Multiple True values indicate mixed dog breed. </th>
        <td> Boolean </th>
    </tr>
    <tr>
        <td> shih_tzu </th>
        <td> Indicates that the dog was a shih tzu. Multiple True values indicate mixed dog breed. </th>
        <td> Boolean </th>
    </tr>
    <tr>
        <td> chihuahua </th>
        <td> Indicates that the dog was a chihuahua. Multiple True values indicate mixed dog breed. </th>
        <td> Boolean </th>
    </tr>
    <tr>
        <td> german_shepherd </th>
        <td> Indicates that the dog was a german shepherd. Multiple True values indicate mixed dog breed. </th>
        <td> Boolean </th>
    </tr>
</table>

# Import

In [34]:
# libraries and modules
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
from sklearn.metrics import silhouette_score
from yellowbrick.cluster import SilhouetteVisualizer
from sklearn.preprocessing import MinMaxScaler


In [35]:
# check for non-numeric values in zip_code
def is_numeric(x):
    try:
        float(x)
    except:
        return False
    return True

In [36]:
# import data
preprocessed = pd.read_csv('../data/processed/dog_bite_preprocessed.csv')

# display
preprocessed.head()

Unnamed: 0,spay_neuter,borough,year,month,day_of_week,latitude,longitude,mixed/other,pit_bull,german_shepherd,shih_tzu,chihuahua,yorkshire_terrier
0,False,brooklyn,2018,1,0,40.641026,-74.016688,True,False,False,False,False,False
1,False,brooklyn,2018,1,5,40.577372,-73.988706,False,True,False,False,False,False
2,False,brooklyn,2018,1,0,40.677916,-74.005154,True,False,False,False,False,False
3,False,brooklyn,2018,1,1,40.577372,-73.988706,False,True,False,False,False,False
4,False,brooklyn,2018,1,2,40.677916,-74.005154,True,False,False,False,False,False


# Wrangling
Borough is not needed in clustering, only EDA  
Normalize numeric data

In [37]:
# drop borough
wrangled = preprocessed.drop(columns=['borough'], axis=1)

wrangled.head()

Unnamed: 0,spay_neuter,year,month,day_of_week,latitude,longitude,mixed/other,pit_bull,german_shepherd,shih_tzu,chihuahua,yorkshire_terrier
0,False,2018,1,0,40.641026,-74.016688,True,False,False,False,False,False
1,False,2018,1,5,40.577372,-73.988706,False,True,False,False,False,False
2,False,2018,1,0,40.677916,-74.005154,True,False,False,False,False,False
3,False,2018,1,1,40.577372,-73.988706,False,True,False,False,False,False
4,False,2018,1,2,40.677916,-74.005154,True,False,False,False,False,False


In [38]:
# normalize using min-max scaling
scaler = MinMaxScaler()
feats_to_scale = ['year', 'month', 'day_of_week', 'latitude', 'longitude']

wrangled[feats_to_scale] = scaler.fit_transform(wrangled[feats_to_scale])

# display
wrangled.head()

Unnamed: 0,spay_neuter,year,month,day_of_week,latitude,longitude,mixed/other,pit_bull,german_shepherd,shih_tzu,chihuahua,yorkshire_terrier
0,False,0.428571,0.0,0.0,0.339782,0.424821,True,False,False,False,False,False
1,False,0.428571,0.0,0.833333,0.177495,0.477559,False,True,False,False,False,False
2,False,0.428571,0.0,0.0,0.433834,0.446559,True,False,False,False,False,False
3,False,0.428571,0.0,0.166667,0.177495,0.477559,False,True,False,False,False,False
4,False,0.428571,0.0,0.333333,0.433834,0.446559,True,False,False,False,False,False


# DBSCAN