In [81]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")
# original data
data = pd.read_csv("Crimedata.csv") 

In [82]:
col_dict = {'V1267': 'Alcohol',
'V1268' : "Alcohol_12months",
 'V1327': 'Marijuana', # at time of offense
 'V1315' : 'Marijuana_30days',
 'V1339' : 'Marijuana_12months',
 'V1330': 'Heroin',
 'V1318' : 'Heroin_30days',
 'V1342' : 'Heroin_12months',
 'V1328': 'Cocaine',
 'V1316': 'Cocaine_30days',
 'V1340': 'Cocaine_12months',
 'RV0001': 'Age',
 'RV0003': 'Race',
 'RV0005': 'Sex',
 'V0772': 'State'} # State where living at time of arrest

In [83]:
data.rename(columns=col_dict,inplace=True)

In [84]:
data.Alcohol.value_counts()
# 0 will be used for na values

(2) 2 = No              15064
(1) 1 = Yes              7144
(-1) -1 = Don't Know       68
(-2) -2 = Refusal          44
Name: Alcohol, dtype: int64

In [85]:
data.Marijuana_30days.value_counts()

(1) 1 = Yes             11299
(2) 2 = No               9315
(-1) -1 = Don't Know       18
(-2) -2 = Refusal           4
Name: Marijuana_30days, dtype: int64

In [86]:
cols_to_clean = ['Alcohol',
 'Alcohol_12months',
 'Marijuana',
 'Marijuana_30days',
 'Marijuana_12months',
 'Heroin',
 'Heroin_30days',
 'Heroin_12months',
 'Cocaine',
 'Cocaine_30days',
 'Cocaine_12months']
all_cols = cols_to_clean + [ 'Age','Sex', 'Race', 'State']

df = data[all_cols]
for col in cols_to_clean:
    df[col] = df[col].str.extract(r'([\-0-9]+)').fillna(0).astype(int)
df['Sex'] = df['Sex'].str.extract(r'([A-Za-z]+)')
df['Race'] = df['Race'].str.slice(7)

In [87]:
df

Unnamed: 0,Alcohol,Alcohol_12months,Marijuana,Marijuana_30days,Marijuana_12months,Heroin,Heroin_30days,Heroin_12months,Cocaine,Cocaine_30days,Cocaine_12months,Age,Sex,Race,State
0,2,2,0,2,2,0,0,0,0,2,2,51,Male,White (NH),GA
1,2,0,1,1,1,0,0,0,0,0,0,29,Male,White (NH),SC
2,2,0,2,2,2,0,0,0,2,1,1,43,Female,White (NH),IN
3,1,1,2,1,1,2,2,2,2,2,2,45,Male,White (NH),CA
4,2,1,0,1,1,0,0,0,0,0,0,31,Female,Black (NH),OH
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24843,1,2,0,2,0,0,0,0,0,1,0,32,Male,Black (NH),VA
24844,2,0,1,2,2,0,0,0,2,2,2,36,Male,White (NH),MI
24845,2,0,0,2,2,0,0,0,0,2,2,34,Male,White (NH),GA
24846,2,2,2,2,0,1,1,0,2,2,0,66,Male,Hispanic,TX


In [88]:
df[(df.Alcohol!=1) & (df.Alcohol_12months==1)]

Unnamed: 0,Alcohol,Alcohol_12months,Marijuana,Marijuana_30days,Marijuana_12months,Heroin,Heroin_30days,Heroin_12months,Cocaine,Cocaine_30days,Cocaine_12months,Age,Sex,Race,State
4,2,1,0,1,1,0,0,0,0,0,0,31,Female,Black (NH),OH
7,2,1,0,1,1,0,2,2,0,1,1,28,Male,Black (NH),CA
10,2,1,2,2,1,2,2,2,2,1,2,29,Female,Hispanic,TX
34,2,1,2,1,2,0,0,0,2,1,2,40,Female,White (NH),NC
51,2,1,0,2,1,0,0,0,0,0,0,31,Male,Black (NH),MO
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24759,-2,1,0,1,1,0,0,0,0,0,0,51,Male,Black (NH),MI
24769,2,1,0,1,1,0,0,0,0,2,1,19,Male,Hispanic,GA
24789,2,1,2,2,2,0,0,0,0,0,0,31,Male,White (NH),MO
24799,2,1,0,1,1,0,2,2,0,1,1,44,Male,Hispanic,TX


In [89]:
def func(row):
    if row.Marijuana==1 or row.Marijuana_30days==1 or row.Marijuana_12months==1:
        return "Marijuana"
    if row.Heroin==1 or row.Heroin_30days==1 or row.Heroin_12months==1:
        return "Heroin"
    if row.Cocaine==1 or row.Cocaine_30days==1 or row.Cocaine_12months==1:
        return "Cocaine"
    if row.Alcohol==1 or row.Alcohol_12months==1:
        return  "Alcohol"
    return None
sample = df.copy()
sample["drug_type"] = df.apply(func, axis=1)
sample = sample[sample['drug_type'].notna()]

In [90]:
def assign_time_drug_use(row):
    drug_name = row["drug_type"]
    if row[drug_name]==1:
        return "Time of Arrest"
    if row[drug_name+"_12months"] ==1:
        return "12 months before"
    if drug_name != "Alcohol" and row[drug_name+"_30days"] ==1:
        return "30 days before"

    return None
sample["time"] = sample.apply(assign_time_drug_use, axis=1)
# sample = sample[sample['drug_type'].notna()]

In [77]:
sample

Unnamed: 0,Alcohol,Alcohol_12months,Marijuana,Marijuana_30days,Marijuana_12months,Heroin,Heroin_30days,Heroin_12months,Cocaine,Cocaine_30days,Cocaine_12months,Age,Sex,Race,State,drug_type,time
1,2,0,1,1,1,0,0,0,0,0,0,29,Male,White (NH),SC,Marijuana,Time of Arrest
2,2,0,2,2,2,0,0,0,2,1,1,43,Female,White (NH),IN,Cocaine,12 months before
3,1,0,2,1,1,2,2,2,2,2,2,45,Male,White (NH),CA,Marijuana,12 months before
4,2,0,0,1,1,0,0,0,0,0,0,31,Female,Black (NH),OH,Marijuana,12 months before
6,1,0,1,1,1,0,0,0,0,0,0,20,Male,Black (NH),OH,Marijuana,Time of Arrest
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24841,2,0,2,2,2,1,1,2,2,2,2,25,Female,White (NH),VA,Heroin,Time of Arrest
24842,2,0,1,1,0,0,0,0,0,0,0,21,Male,Black (NH),IL,Marijuana,Time of Arrest
24843,1,0,0,2,0,0,0,0,0,1,0,32,Male,Black (NH),VA,Cocaine,30 days before
24844,2,0,1,2,2,0,0,0,2,2,2,36,Male,White (NH),MI,Marijuana,Time of Arrest


In [110]:
def categorize_age(val):
    if val<=18:
        return "<18"
    if val <=24:
        return "18-24"
    if val <=34:
        return  "25-34"
    if val<= 44:
        return "35-44"
    if val <=54:
        return "45-54"
    if val<=64:
        return "55-64"
    return "64+"
sample["Age_group"]  = sample.Age.apply(categorize_age)

In [112]:
sample.Age_group.value_counts()

25-34    5899
35-44    4697
45-54    2935
18-24    1838
55-64    1265
64+       276
<18        28
Name: Age_group, dtype: int64

In [114]:
import plotly.express as px
import plotly.graph_objects as go

fig = px.sunburst(sample, path=["drug_type", "time"])
fig.show()

In [79]:

# homeless
sample.columns

Index(['Alcohol', 'Alcohol_12months', 'Marijuana', 'Marijuana_30days',
       'Marijuana_12months', 'Heroin', 'Heroin_30days', 'Heroin_12months',
       'Cocaine', 'Cocaine_30days', 'Cocaine_12months', 'Age', 'Sex', 'Race',
       'State', 'drug_type', 'time'],
      dtype='object')

In [116]:
from dash import Dash, dcc, html, Input, Output
import plotly.express as px

# token = open(".sk.eyJ1IjoicGhpbGVua3UiLCJhIjoiY2w4eGlkcmQwMDUyZzNvazRhaXhmZndjMiJ9.dyAPb-U-r2-V7muIc-orrQ").read() # you will need your own token


app = Dash(__name__)

app.layout = html.Div([
    html.H4('Polotical candidate voting pool analysis'),
    html.P("Select a candidate:"),
    dcc.RadioItems(
        id='candidate', 
        options=["time", "Age_group", "Sex", "Race"],
        value="time",
        inline=True
    ),
    dcc.Graph(id="graph"),
])


@app.callback(
    Output("graph", "figure"), 
    Input("candidate", "value"))
def display_choropleth(candidate):
    fig = px.sunburst(sample, path=["drug_type", candidate])   
    fig.show()

    return fig

app.run_server(debug=True, use_reloader=False)

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on
