### NYC Inmates

###### This notebook aims to analyse and visualise inmate data collected by New York City prisons in order to answer questions  such as the possibility of a relationship between the mental health of an inmate and violence and whether or not prisons need to improve mental health facilities.

###### 1. Data Cleaning
###### 2. Exploratory Data Analysis
###### 3. Data Visualisation
###### 4. Conclusion

In [21]:
#importing the necessary packages
import plotly.dashboard_objs as dashboard
import IPython.display
from IPython.display import Image
 
dboard = dashboard.Dashboard()
import pandas as pd

import plotly.graph_objs as go
import seaborn as sns
import matplotlib.pyplot as plt

from plotly.offline import plot
import scipy
                                                    

In [2]:
# chart_studio.tools.set_credentials_file(username='millie_n', api_key='oOaKfFuIv4B1PlY107O7')

In [4]:
#reading in the data
df = pd.read_csv('daily-inmates-in-custody.csv')


In [5]:
# checking missing values
df.isnull().sum()

INMATEID                 0
ADMITTED_DT              0
DISCHARGED_DT         7151
CUSTODY_LEVEL          156
BRADH                    0
RACE                    24
GENDER                  24
AGE                     13
INMATE_STATUS_CODE       0
SEALED                   0
SRG_FLG                  0
TOP_CHARGE             872
INFRACTION               0
dtype: int64

###### All the values in the DISCHARGED_DT column are missing so I am dropping the column.

In [6]:
df.dropna(axis=1,how='all')

Unnamed: 0,INMATEID,ADMITTED_DT,CUSTODY_LEVEL,BRADH,RACE,GENDER,AGE,INMATE_STATUS_CODE,SEALED,SRG_FLG,TOP_CHARGE,INFRACTION
0,152258,2018-08-24T01:46:33.000,MIN,Y,W,M,54.0,DE,N,N,140.25,N
1,20124341,2018-06-13T00:59:55.000,MAX,Y,W,M,23.0,DE,N,N,125.25,Y
2,155323,2019-02-05T12:04:19.000,MAX,N,B,M,31.0,CS,N,Y,105.05,Y
3,118754,2019-02-24T18:22:23.000,MAX,N,O,M,28.0,CS,N,N,,Y
4,20203998,2019-05-02T01:35:22.000,MAX,Y,B,M,27.0,DE,N,N,125.25,Y
5,20200982,2019-08-09T18:03:42.000,MIN,N,O,M,57.0,CSP,N,N,160.05,N
6,20100177,2019-06-29T12:49:17.000,MIN,N,O,M,48.0,CS,N,N,155.25,N
7,20006004,2019-05-23T13:19:00.000,MED,N,B,M,34.0,DEP,N,N,155.30,N
8,20173572,2019-08-13T11:19:00.000,MED,N,B,M,19.0,DE,N,N,160.15,N
9,49115,2019-02-12T20:02:00.000,MIN,N,O,M,57.0,CS,N,N,110-140.20,N


###### The null hypothesis states that there is no relationship between mental health and being potentially violent. 
###### The alternative hypothesis states that there is a strong relationship between inmates under mental observation and being potentially violent.

In [27]:
r = scipy.stats.pearsonr((df['BRADH'] == 'Y'),(df['INFRACTION'] == 'Y'))
r

(0.17296678612605018, 3.8235682266464816e-49)

###### A p value of 3.8e-49 disproves the null hypothesis and leads me to accepting the alternative hypothesis which states that inmates under mental observation are in fact potentially violent. This shows that there is statistical significance between the two variables.

In [7]:
mental_obs_custody = df[(df.BRADH == 'Y') & (df.INFRACTION == 'Y')]
mental_obs_custody.count()

INMATEID              1313
ADMITTED_DT           1313
DISCHARGED_DT            0
CUSTODY_LEVEL         1313
BRADH                 1313
RACE                  1312
GENDER                1312
AGE                   1313
INMATE_STATUS_CODE    1313
SEALED                1313
SRG_FLG               1313
TOP_CHARGE            1219
INFRACTION            1313
dtype: int64

 ###### This is a count of all inmates who are under mental observation and are potentially harmful.

In [8]:
mental_custody = mental_obs_custody['CUSTODY_LEVEL'].value_counts(normalize=False).to_frame()
label = mental_custody.index.tolist()
value = mental_custody['CUSTODY_LEVEL'].tolist()
mo = go.Pie(labels=label,values=value)
layout = go.Layout(   
        title='Custody Levels of Inmates Under Mental Observation With Infractions',
        height=600,
        margin=go.Margin(l=0, r=200, b=100, t=100, pad=4)   # Margins - Left, Right, Top Bottom, Padding
        )

fig = go.Figure(data=[mo], layout=layout)
plot(fig)



plotly.graph_objs.Margin is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.layout.Margin




'file:///home/madi/Desktop/dashboard/temp-plot.html'

In [9]:
mental = df[(df['BRADH'] == 'Y') & (df['CUSTODY_LEVEL'])]
mental_custody_level = mental['CUSTODY_LEVEL'].value_counts(normalize=False).to_frame()
label = mental_custody_level.index.tolist()
value = mental_custody_level['CUSTODY_LEVEL'].tolist()
moc = go.Pie(labels=label,values=value) 
layout = go.Layout(   
        title='Custody Levels of Inmates Under Mental Observation',
        height=600,
        margin=go.Margin(l=0, r=200, b=100, t=100, pad=4)   # Margins - Left, Right, Top Bottom, Padding
        )

fig = go.Figure(data=[moc], layout=layout)
plot(fig)



plotly.graph_objs.Margin is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.layout.Margin




'file:///home/madi/Desktop/dashboard/temp-plot.html'

In [10]:
date = df['BRADH'].value_counts(normalize=False).to_frame()
labels = date.index.tolist()

values = date['BRADH'].tolist()
mo = [go.Bar(x=labels,y=values)]
layout = dict(title = "Inmates Under Mental Observation",
              xaxis= dict(title= 'Mental Observation',ticklen= 5,zeroline= False))
fig = dict(data = mo, layout = layout)
# plot.sign_in('millie_n', '686orP7Nbe1JFcss9NfB')




plot(fig)


'file:///home/madi/Desktop/dashboard/temp-plot.html'

In [11]:
custodyl = df['CUSTODY_LEVEL'].value_counts(normalize=False).to_frame()
labels = custodyl.index.tolist()
values = custodyl['CUSTODY_LEVEL'].tolist()
bar = [go.Bar(x=labels,y=values)]

layout = dict(title = "Custody Levels for Inmates",
              xaxis= dict(title= 'Custody Level',ticklen= 5,zeroline= False))
fig = dict(data = bar, layout = layout)
plot(fig)

'file:///home/madi/Desktop/dashboard/temp-plot.html'

In [12]:
race = df['RACE'].value_counts(normalize=False).to_frame()
labels = race.index.tolist()
values = race['RACE'].tolist()
title = 'Distribution of different races in prison'

pie = go.Pie(labels=labels,values=values)
layout = go.Layout(   
        title=title,
        height=600,
        margin=go.Margin(l=0, r=200, b=100, t=100, pad=4)   # Margins - Left, Right, Top Bottom, Padding
        )

fig = go.Figure(data=[pie], layout=layout)
plot(fig)


plotly.graph_objs.Margin is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.layout.Margin




'file:///home/madi/Desktop/dashboard/temp-plot.html'

In [13]:
date = df['SRG_FLG'].value_counts(normalize=False).to_frame()
labels = date.index.tolist()

values = date['SRG_FLG'].tolist()
data = [go.Bar(x=labels,y=values)]
layout = dict(title = "Inmates Affiliated With a Gang",
              xaxis= dict(title= 'Gang Affiliation',ticklen= 5,zeroline= False))
fig = dict(data = data, layout = layout)
plot(fig)

'file:///home/madi/Desktop/dashboard/temp-plot.html'

In [14]:
age = df['AGE'].value_counts(normalize=False).to_frame()
labels = age.index.tolist()
values = age['AGE'].tolist()
title = 'Distribution of different ages in prison'

trace = go.Bar(x=labels,y=values)
layout = go.Layout(   
        title=title,
        xaxis= dict(title= 'Ages',ticklen= 5,zeroline= False),
        yaxis= dict(title= 'Number of Inmates',ticklen= 5,zeroline= False),
        height=600,
        margin=go.Margin(l=0, r=200, b=100, t=100, pad=4)  # Margins - Left, Right, Top Bottom, Padding
        )

fig = go.Figure(data=[trace], layout=layout)
plot(fig)


plotly.graph_objs.Margin is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.layout.Margin




'file:///home/madi/Desktop/dashboard/temp-plot.html'

In [15]:
infract = df['INFRACTION'].value_counts(normalize=False).to_frame()
labels = infract.index.tolist()
values = infract['INFRACTION'].tolist()
title = 'Inmates who have infractions'

infract = go.Pie(labels=labels,values=values)
layout = go.Layout(   
        title=title,
        height=600,
        margin=go.Margin(l=0, r=200, b=100, t=100, pad=4)   # Margins - Left, Right, Top Bottom, Padding
        )

fig = go.Figure(data=[infract], layout=layout)
plot(fig)


plotly.graph_objs.Margin is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.layout.Margin




'file:///home/madi/Desktop/dashboard/temp-plot.html'

In [16]:
gender = df['GENDER'].value_counts(normalize=False).to_frame()
label = gender.index.tolist()
value = gender['GENDER'].tolist()
mo = go.Pie(labels=label,values=value)
layout = go.Layout(   
        title='Gender',
        height=600,
        margin=go.Margin(l=0, r=200, b=100, t=100, pad=4)   # Margins - Left, Right, Top Bottom, Padding
        )

fig = go.Figure(data=[mo], layout=layout)
plot(fig)



plotly.graph_objs.Margin is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.layout.Margin




'file:///home/madi/Desktop/dashboard/temp-plot.html'

In [17]:
plots = [mo,bar,pie,trace,infract]

In [38]:
box_1 = {
    'type': 'box',
    'boxType': 'plot',
    'fileId': 'file:///home/madi/Desktop/dashboard/temp-plot.html',
    'title': 'scatter-for-dashboard'
}

box_2 = {
    'type': 'box',
    'boxType': 'plot',
    'fileId': 'file:///home/madi/Desktop/dashboard/temp-plot.html',
    'title': 'pie-for-dashboard'
}

box_3 = {
    'type': 'box',
    'boxType': 'plot',
    'fileId': 'file:///home/madi/Desktop/dashboard/temp-plot.html',
    'title': 'box-for-dashboard',
    'shareKey':'None'
}

dboard.insert(box_1)

PlotlyError: Make sure the box_id is specfied if there is at least one box in your dashboard.

In [39]:
dboard.insert(box_2,'below',1)