In [1]:
# CHEATSHEET --> https://scikit-learn.org/stable/tutorial/machine_learning_map/index.html
import sys

import tensorflow.keras
import pandas as pd
import sklearn as sk
import tensorflow as tf
import numpy as np
from sklearn import preprocessing
import folium as folium
import matplotlib.pyplot as plt
import math
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression, PoissonRegressor, BayesianRidge
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import FunctionTransformer, OneHotEncoder
from sklearn.preprocessing import StandardScaler, KBinsDiscretizer
from sklearn.compose import ColumnTransformer

In [2]:
data = pd.read_csv("./NYPD_Arrests_Data_DANGEROUS_DRUGS.csv", sep=",")
data = data[["ARREST_DATE", "LAW_CODE"]]

# Remove empty fields, none are empty in this case
data.dropna(inplace=True)

In [3]:
data["ARREST_DATE"] = data["ARREST_DATE"].astype("datetime64").dt.to_period('M')


In [4]:
data_group = data.groupby("LAW_CODE")

In [5]:
pd.set_option('display.max_rows', 200)
print(data_group.size())

LAW_CODE
PL 2200300    290866
PL 2200600        18
PL 2200601     17278
PL 2200602      4757
PL 2200603      2571
PL 2200604       668
PL 2200605      3124
PL 2200606       613
PL 2200607        68
PL 2200608        24
PL 2200900         4
PL 2200901      1839
PL 2200902       283
PL 2200903       583
PL 2200904       414
PL 2200905        67
PL 2200906      1666
PL 2200907       676
PL 2200908        55
PL 2200909        50
PL 2200910       336
PL 2200911       187
PL 2200912        67
PL 2200913       135
PL 2200914       121
PL 2200915         9
PL 2201600        67
PL 2201601    114378
PL 2201602      2317
PL 2201603      1546
PL 2201604       511
PL 2201605       672
PL 2201606       217
PL 2201607       246
PL 2201608       317
PL 2201609        83
PL 2201610        35
PL 2201611       231
PL 2201612      1696
PL 2201613       190
PL 2201801      4439
PL 2201802       246
PL 2201803       840
PL 2201804        20
PL 2201805       184
PL 2201806       259
PL 2201807        25
PL 2

In [6]:
law_code_count = []
for index, count in enumerate(data_group):
    law_code_count.append([count[0], data_group.size()[index]])
print(law_code_count)
sorted_law_code_count = sorted(law_code_count, key= lambda x: )

[['PL 2200300', 290866], ['PL 2200600', 18], ['PL 2200601', 17278], ['PL 2200602', 4757], ['PL 2200603', 2571], ['PL 2200604', 668], ['PL 2200605', 3124], ['PL 2200606', 613], ['PL 2200607', 68], ['PL 2200608', 24], ['PL 2200900', 4], ['PL 2200901', 1839], ['PL 2200902', 283], ['PL 2200903', 583], ['PL 2200904', 414], ['PL 2200905', 67], ['PL 2200906', 1666], ['PL 2200907', 676], ['PL 2200908', 55], ['PL 2200909', 50], ['PL 2200910', 336], ['PL 2200911', 187], ['PL 2200912', 67], ['PL 2200913', 135], ['PL 2200914', 121], ['PL 2200915', 9], ['PL 2201600', 67], ['PL 2201601', 114378], ['PL 2201602', 2317], ['PL 2201603', 1546], ['PL 2201604', 511], ['PL 2201605', 672], ['PL 2201606', 217], ['PL 2201607', 246], ['PL 2201608', 317], ['PL 2201609', 83], ['PL 2201610', 35], ['PL 2201611', 231], ['PL 2201612', 1696], ['PL 2201613', 190], ['PL 2201801', 4439], ['PL 2201802', 246], ['PL 2201803', 840], ['PL 2201804', 20], ['PL 2201805', 184], ['PL 2201806', 259], ['PL 2201807', 25], ['PL 220210

In [9]:
law_codes = pd.read_csv("./law_codes.csv", sep=";")

In [10]:
print(law_codes)

                                  Penal Law OFFENSE          CLASS    CODE
0                            Abandonment of a child       E Felony  260.00
1                      Abortion in the first degree       D Felony  125.45
2                     Abortion in the second degree       E Felony  125.40
3    Absconding from a community treatment facility       E Felony  205.19
4                Absconding from a furlough program  A Misdemeanor  205.18
..                                              ...            ...     ...
571               Welfare fraud in the fifth degree  A Misdemeanor  158.05
572               Welfare fraud in the first degree       B Felony  158.25
573              Welfare fraud in the fourth degree       E Felony  158.10
574              Welfare fraud in the second degree       C Felony  158.20
575               Welfare fraud in the third degree       D Felony  158.15

[576 rows x 3 columns]


In [37]:
for count in law_code_count:
    transformed_code = count[0][3:6] + "." + count[0][6:8]
    law_code = law_codes[law_codes.CODE == transformed_code]
    print(law_code["Penal Law OFFENSE"].values[0] + ";" + law_code["CLASS"].values[0] + ";" + str(count[1]))

Criminal possession of a controlled substance in the seventh degree;A Misdemeanor;290866
Criminal possession of a controlled substance in the fifth degree;D Felony;18
Criminal possession of a controlled substance in the fifth degree;D Felony;17278
Criminal possession of a controlled substance in the fifth degree;D Felony;4757
Criminal possession of a controlled substance in the fifth degree;D Felony;2571
Criminal possession of a controlled substance in the fifth degree;D Felony;668
Criminal possession of a controlled substance in the fifth degree;D Felony;3124
Criminal possession of a controlled substance in the fifth degree;D Felony;613
Criminal possession of a controlled substance in the fifth degree;D Felony;68
Criminal possession of a controlled substance in the fifth degree;D Felony;24
Criminal possession of a controlled substance in the fourth degree;C Felony;4
Criminal possession of a controlled substance in the fourth degree;C Felony;1839
Criminal possession of a controlled sub