In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import warnings
warnings.filterwarnings("ignore")

In [2]:
# original data
data = pd.read_csv("Crimedata.csv") 

In [3]:
# rename column
col_dict = {'V1267': 'Alcohol',
'V1268' : "Alcohol_12months",
'V1263' : "Alcohol_ever",
 'V1327': 'Marijuana', # at time of offense
 'V1315' : 'Marijuana_30days',
 'V1339' : 'Marijuana_12months',
  'V1291' :  'Marijuana_ever',
 'V1330': 'Heroin',
 'V1318' : 'Heroin_30days',
 'V1342' : 'Heroin_12months',
 'V1294' :  'Heroin_ever',
 'V1328': 'Cocaine',
 'V1316': 'Cocaine_30days',
 'V1340': 'Cocaine_12months',
 'V1301': 'Cocaine_ever',
 'RV0052' : 'Highest_Year_Education_Before_Prison',
 'RV0001': 'Age',
 'RV0003': 'Race',
 'RV0005': 'Sex',
 'V0772': 'State'} # State where living at time of arrest
data.rename(columns=col_dict,inplace=True)

In [4]:
d_copy = data.copy()

In [36]:
data = d_copy.copy()

In [5]:
cols_to_clean = ['Alcohol',
 'Alcohol_12months',
 'Alcohol_ever',
 'Marijuana',
 'Marijuana_30days',
 'Marijuana_12months',
 'Marijuana_ever',
 'Heroin',
 'Heroin_30days',
 'Heroin_12months',
 'Heroin_ever',
 'Cocaine',
 'Cocaine_30days',
 'Cocaine_12months',
 'Cocaine_ever',
 ]
all_cols = cols_to_clean + [ 'Age','Sex', 'Race', 'State']

df = data[all_cols]
for col in cols_to_clean:
    df[col] = df[col].str.extract(r'([\-0-9]+)').fillna(0).astype(int)
df['Sex'] = df['Sex'].str.extract(r'([A-Za-z]+)')
df['Race'] = df['Race'].str.slice(7)

In [33]:
df.head()

Unnamed: 0,Alcohol,Alcohol_12months,Alcohol_ever,Marijuana,Marijuana_30days,Marijuana_12months,Marijuana_ever,Heroin,Heroin_30days,Heroin_12months,Heroin_ever,Cocaine,Cocaine_30days,Cocaine_12months,Cocaine_ever,Age,Sex,Race,State
0,2,2,1,0,2,2,1,0,0,0,2,0,2,2,1,51,Male,White (NH),GA
1,2,0,1,1,1,1,1,0,0,0,2,0,0,0,2,29,Male,White (NH),SC
2,2,0,1,2,2,2,1,0,0,0,2,2,1,1,1,43,Female,White (NH),IN
3,1,1,1,2,1,1,1,2,2,2,1,2,2,2,1,45,Male,White (NH),CA
4,2,1,1,0,1,1,1,0,0,0,2,0,0,0,2,31,Female,Black (NH),OH


In [6]:
def drug_use(row):
    if row.Cocaine==1 or row.Heroin==1 or row.Marijuana==1 or row.Alcohol==1 :
        return  "Time of Arrest"
    if row.Cocaine_30days==1 or row.Heroin_30days==1 or row.Marijuana_30days==1 or row.Heroin_12months==1 :
        return "Past 30 days"
    if row.Alcohol_12months==1 or row.Marijuana_12months==1 or row.Cocaine_12months==1 or row.Heroin_12months==1 :
        return "Past 12 months"
    if row.Marijuana_ever==1 or row.Cocaine_ever==1 or row.Alcohol_ever==1 or row.Heroin_ever==1:
        return "More than a year ago"
    return "Never used"
df_2 = df.copy()
df_2["time_of_use"] = df_2.apply(drug_use, axis=1)

In [7]:
def drug_type(row):
    drug_alg = []
    if row.Cocaine==1 or row.Heroin==1 or row.Marijuana==1 or row.Alcohol==1 :
        if row.Marijuana==1 :
            drug_alg.append("Marijuana") 
        if row.Heroin==1:
            drug_alg.append("Heroin") 
        if row.Cocaine==1:
            drug_alg.append("Cocaine")
        if row.Alcohol==1:
            drug_alg.append("Alcohol")
        return tuple(drug_alg)
    if row.Cocaine_30days==1 or row.Heroin_30days==1 or row.Marijuana_30days==1:
        if row.Marijuana_30days==1 :
            drug_alg.append("Marijuana") 
        if row.Heroin_30days==1:
            drug_alg.append("Heroin") 
        if row.Cocaine_30days==1:
            drug_alg.append("Cocaine")
        return tuple(drug_alg)
    if row.Alcohol_12months==1 or row.Marijuana_12months==1 or row.Cocaine_12months==1 or row.Heroin_12months==1 :
        if row.Marijuana_12months==1 :
            drug_alg.append("Marijuana") 
        if row.Heroin_12months==1:
            drug_alg.append("Heroin") 
        if row.Cocaine_12months==1:
            drug_alg.append("Cocaine")
        if row.Alcohol_12months==1:
            drug_alg.append("Alcohol")
        return tuple(drug_alg)
    if row.Marijuana_ever==1 or row.Cocaine_ever==1 or row.Alcohol_ever==1 or row.Heroin_ever==1:
        if row.Marijuana_ever==1 :
            drug_alg.append("Marijuana") 
        if row.Heroin_ever==1:
            drug_alg.append("Heroin") 
        if row.Cocaine_ever==1:
            drug_alg.append("Cocaine")
        if row.Alcohol_ever==1:
            drug_alg.append("Alcohol")
        return tuple(drug_alg)
    return tuple(drug_alg)
df_2["drug_type"] = df.apply(drug_type, axis=1)

In [8]:
def num_drugs(row):
    l = len(row["drug_type"])
    if l==1: 
        return row["drug_type"][0]
    if row.time_of_use =="Never used":
        return "No use"
    return "More than one drugs"
df_2["num_drugs"] = df_2.apply(num_drugs, axis=1)

In [9]:
def categorize_age(val):
    if val<=18:
        return "<18"
    if val <=24:
        return "18-24"
    if val <=34:
        return  "25-34"
    if val<= 44:
        return "35-44"
    if val <=54:
        return "45-54"
    if val<=64:
        return "55-64"
    return "64+"
df_2["Age_group"]  = df_2.Age.apply(categorize_age)

In [10]:
df_2.head()

Unnamed: 0,Alcohol,Alcohol_12months,Alcohol_ever,Marijuana,Marijuana_30days,Marijuana_12months,Marijuana_ever,Heroin,Heroin_30days,Heroin_12months,...,Cocaine_12months,Cocaine_ever,Age,Sex,Race,State,time_of_use,drug_type,num_drugs,Age_group
0,2,2,1,0,2,2,1,0,0,0,...,2,1,51,Male,White (NH),GA,More than a year ago,"(Marijuana, Cocaine, Alcohol)",More than one drugs,45-54
1,2,0,1,1,1,1,1,0,0,0,...,0,2,29,Male,White (NH),SC,Time of Arrest,"(Marijuana,)",Marijuana,25-34
2,2,0,1,2,2,2,1,0,0,0,...,1,1,43,Female,White (NH),IN,Past 30 days,"(Cocaine,)",Cocaine,35-44
3,1,1,1,2,1,1,1,2,2,2,...,2,1,45,Male,White (NH),CA,Time of Arrest,"(Alcohol,)",Alcohol,45-54
4,2,1,1,0,1,1,1,0,0,0,...,0,2,31,Female,Black (NH),OH,Past 30 days,"(Marijuana,)",Marijuana,25-34


In [11]:
df_2.to_csv("extracted_data_vis1.csv", index=False)

In [17]:
fig = px.sunburst(df_2, path=["time_of_use", "num_drugs"],
             color_discrete_sequence=px.colors.qualitative.D3,
                                   height=800, width=600

)
# fig.update_layout(
#     margin = dict(t=10, l=0, r=10, b=0)
# )
fig.show()

In [19]:
from dash import Dash, dcc, html, Input, Output
import plotly.express as px

# token = open(".sk.eyJ1IjoicGhpbGVua3UiLCJhIjoiY2w4eGlkcmQwMDUyZzNvazRhaXhmZndjMiJ9.dyAPb-U-r2-V7muIc-orrQ").read() # you will need your own token


app = Dash(__name__)

app.layout = html.Div([
    html.H4('Sunburst Visualization of The Distribution of Different Types of Drugs Among Prisoners'),
    html.P("Select a criteria to view:"),
    dcc.RadioItems(
        id='candidate', 
        options=["time_of_use", "Age_group", "Sex", "Race"],
        value="time_of_use",
        inline=True
    ),
    dcc.Graph(id="graph"),
])
text_dict = {'Alcohol_percentage': 'Alcohol',
 'Marijuana_percentage': 'Marijuana',
 'Heroin_percentage': 'Heroin',
 'Cocaine_percentage': 'Cocaine',
 "count": "num_prisoners"}

@app.callback(
    Output("graph", "figure"), 
    Input("candidate", "value"))
def display_choropleth(candidate):
    fig = px.sunburst(df_2, path=["num_drugs", candidate],
                      color_discrete_sequence=px.colors.qualitative.D3,
                      height=800, width=900
)   
    # fig.update_layout(
    #     margin = dict(t=0, l=0, r=10, b=0),

    # )

    fig.show()

    return fig

app.run_server(debug=True, use_reloader=False)

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on
