# Women's imprisonment rates
## Criminal Justice Statistics Police Force Area: Custodial sentence lengths

#### Importing pandas library and reading in data

In [2]:
import pandas as pd
df = pd.read_csv('data/interim/PFA_2009-21_women_cust_comm_sus.csv')

In [2]:
df.head()

Unnamed: 0,year,pfa,sex,age_group,offence,outcome,sentence_len,freq
0,2009,Avon and Somerset,Female,Young adults,Violence against the person,Community sentence,,2
1,2009,Avon and Somerset,Female,Young adults,Violence against the person,Suspended sentence,,1
2,2009,Avon and Somerset,Female,Young adults,Violence against the person,Suspended sentence,,1
3,2009,Avon and Somerset,Female,Young adults,Public order offences,Community sentence,,1
4,2009,Avon and Somerset,Female,Young adults,Miscellaneous crimes against society,Community sentence,,1


#### Filtering data for custodial sentences

In [3]:
filt = df['outcome'] == 'Immediate custody'
df2 = df[filt]
# filt2 = df['year'] >= 2014
# df2 = df[filt & filt2]

In [4]:
df2.head()

Unnamed: 0,year,pfa,sex,age_group,offence,outcome,sentence_len,freq
12,2009,Avon and Somerset,Female,Young adults,Theft Offences,Immediate custody,Up to 1 month,1
18,2009,Avon and Somerset,Female,Young adults,Violence against the person,Immediate custody,Imprisonment for public protection,1
27,2009,Avon and Somerset,Female,Adults,Miscellaneous crimes against society,Immediate custody,More than 12 months and up to 18 months,1
30,2009,Avon and Somerset,Female,Adults,Theft Offences,Immediate custody,Up to 1 month,3
32,2009,Avon and Somerset,Female,Adults,Theft Offences,Immediate custody,Up to 1 month,1


#### Recategorising sentence lengths into less than six months and between six and 12 months

In [5]:
df2['sentence_len'].value_counts()

Up to 1 month                              12688
More than 1 month and up to 2 months       10240
More than 3 months and under 6 months       9664
More than 2 months and up to 3 months       8219
More than 2 years and up to 3 years         4678
More than 12 months and up to 18 months     4361
6 months                                    3824
More than 18 months and up to 2 years       3597
More than 6 months and up to 9 months       3329
12 months                                   3005
More than 3 years and under 4 years         1256
More than 9 months and under 12 months      1148
More than 4 years and up to 5 years         1117
4 years                                      992
More than 5 years and up to 6 years          702
More than 6 years and up to 7 years          360
More than 7 years and up to 8 years          278
Life sentence                                267
More than 10 years and up to 15 years        184
More than 8 years and up to 9 years          157
More than 9 years an

In [6]:
less_6months = ["Up to 1 month", 
                "More than 1 month and up to 2 months",
                "More than 2 months and up to 3 months",
                "More than 3 months and under 6 months"]

In [7]:
six_12_months = ["6 months",
                 "More than 6 months and up to 9 months",
                 "More than 9 months and under 12 months"]

In [8]:
filt = df2['sentence_len'].isin(less_6months)
df2.loc[filt, 'sentence_len'] = "Less than 6 months"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [9]:
filt= df2['sentence_len'].isin(six_12_months)
df2.loc[filt, 'sentence_len'] = "6 months and under 12 months"

In [10]:
df2['sentence_len'].value_counts()

Less than 6 months                         40811
6 months and under 12 months                8301
More than 2 years and up to 3 years         4678
More than 12 months and up to 18 months     4361
More than 18 months and up to 2 years       3597
12 months                                   3005
More than 3 years and under 4 years         1256
More than 4 years and up to 5 years         1117
4 years                                      992
More than 5 years and up to 6 years          702
More than 6 years and up to 7 years          360
More than 7 years and up to 8 years          278
Life sentence                                267
More than 10 years and up to 15 years        184
More than 8 years and up to 9 years          157
More than 9 years and up to 10 years         133
Imprisonment for public protection            82
More than 15 years and less than life         49
Name: sentence_len, dtype: int64

In [11]:
df2.head()

Unnamed: 0,year,pfa,sex,age_group,offence,outcome,sentence_len,freq
12,2009,Avon and Somerset,Female,Young adults,Theft Offences,Immediate custody,Less than 6 months,1
18,2009,Avon and Somerset,Female,Young adults,Violence against the person,Immediate custody,Imprisonment for public protection,1
27,2009,Avon and Somerset,Female,Adults,Miscellaneous crimes against society,Immediate custody,More than 12 months and up to 18 months,1
30,2009,Avon and Somerset,Female,Adults,Theft Offences,Immediate custody,Less than 6 months,3
32,2009,Avon and Somerset,Female,Adults,Theft Offences,Immediate custody,Less than 6 months,1


In [12]:
filt1 = df2['sentence_len'] != "Less than 6 months"
filt2 = df2['sentence_len'] != "6 months and under 12 months"
filt = filt1 & filt2
df2.loc[filt, 'sentence_len'] = "Over 12 months"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [13]:
df2['sentence_len'].value_counts()

Less than 6 months              40811
Over 12 months                  21218
6 months and under 12 months     8301
Name: sentence_len, dtype: int64

### Grouping for future analysis

In [14]:
df3 = df2.groupby(['pfa', 'year', 'sentence_len'], as_index=False)['freq'].sum()
df3

Unnamed: 0,pfa,year,sentence_len,freq
0,Avon and Somerset,2009,6 months and under 12 months,11
1,Avon and Somerset,2009,Less than 6 months,117
2,Avon and Somerset,2009,Over 12 months,37
3,Avon and Somerset,2010,6 months and under 12 months,16
4,Avon and Somerset,2010,Less than 6 months,113
...,...,...,...,...
1626,Wiltshire,2020,Less than 6 months,15
1627,Wiltshire,2020,Over 12 months,13
1628,Wiltshire,2021,6 months and under 12 months,3
1629,Wiltshire,2021,Less than 6 months,15


#### Outputting to CSV

In [15]:
df3.to_csv('../data/interim/PFA_2009-21_women_cust_sentence_len.csv', index=False)

#### Checking the CSV

In [16]:
review = pd.read_csv('../data/interim/PFA_2009-21_women_cust_sentence_len.csv')
review.head()

Unnamed: 0,pfa,year,sentence_len,freq
0,Avon and Somerset,2009,6 months and under 12 months,11
1,Avon and Somerset,2009,Less than 6 months,117
2,Avon and Somerset,2009,Over 12 months,37
3,Avon and Somerset,2010,6 months and under 12 months,16
4,Avon and Somerset,2010,Less than 6 months,113


In [18]:
review.query('pfa == "Essex"')

Unnamed: 0,pfa,year,sentence_len,freq
426,Essex,2009,6 months and under 12 months,25
427,Essex,2009,Less than 6 months,173
428,Essex,2009,Over 12 months,46
429,Essex,2010,6 months and under 12 months,18
430,Essex,2010,Less than 6 months,153
431,Essex,2010,Over 12 months,46
432,Essex,2011,6 months and under 12 months,20
433,Essex,2011,Less than 6 months,157
434,Essex,2011,Over 12 months,44
435,Essex,2012,6 months and under 12 months,16
