<a href="https://colab.research.google.com/github/CindyXin97/MIT_COVID-19_Datathon/blob/master/County_level_Texas_Michigan.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Initial Explorations

In [0]:
import pandas as pd
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt

from google.colab import files

## The Marshall Project Data

From https://github.com/themarshallproject/COVID_prison_data
+ State-by-state prison test, case, and deaths data (inmates and staff)

In [0]:
covid_prison_cases = pd.read_csv('https://raw.githubusercontent.com/themarshallproject/COVID_prison_data/master/data/covid_prison_cases.csv')
prison_populations = pd.read_csv('https://raw.githubusercontent.com/themarshallproject/COVID_prison_data/master/data/prison_populations.csv')
staff_populations = pd.read_csv('https://raw.githubusercontent.com/themarshallproject/COVID_prison_data/master/data/staff_populations.csv')

# drop notes columns & state name column
covid_prison_cases.drop(['notes'], axis = 1, inplace = True)
prison_populations.drop(['name', 'as_of_date'], axis = 1, inplace = True)
staff_populations.drop(['name', 'as_of_date', 'notes'], axis = 1, inplace = True)

In [4]:
covid_prison_cases.head()

Unnamed: 0,name,abbreviation,staff_tests,prisoner_tests,total_staff_cases,total_prisoner_cases,total_staff_deaths,total_prisoner_deaths,as_of_date
0,Alabama,AL,,102.0,17.0,8.0,0.0,1.0,2020-05-06
1,Alaska,AK,,235.0,8.0,1.0,0.0,0.0,2020-05-06
2,Arizona,AZ,,277.0,52.0,69.0,0.0,5.0,2020-05-06
3,Arkansas,AR,,,127.0,944.0,0.0,5.0,2020-05-07
4,California,CA,,1551.0,169.0,398.0,0.0,1.0,2020-05-06


In [5]:
# view most recent dates from each state + federal
#   ( max number of cases -> most recent )
idx0 = covid_prison_cases.groupby('name')['as_of_date'].transform(max) == covid_prison_cases['as_of_date']

covid_prison_cases_most_recent = covid_prison_cases[idx0]
covid_prison_cases_most_recent.head()

Unnamed: 0,name,abbreviation,staff_tests,prisoner_tests,total_staff_cases,total_prisoner_cases,total_staff_deaths,total_prisoner_deaths,as_of_date
0,Alabama,AL,,102.0,17.0,8.0,0.0,1.0,2020-05-06
1,Alaska,AK,,235.0,8.0,1.0,0.0,0.0,2020-05-06
2,Arizona,AZ,,277.0,52.0,69.0,0.0,5.0,2020-05-06
3,Arkansas,AR,,,127.0,944.0,0.0,5.0,2020-05-07
4,California,CA,,1551.0,169.0,398.0,0.0,1.0,2020-05-06


In [0]:
# merge with one-shot data of inmate and staff populations
merged_tables_most_recent = covid_prison_cases_most_recent.join( 
    prison_populations.set_index('abbreviation'), 
    on='abbreviation', rsuffix='_prisonpop' 
    ).join( 
        staff_populations.set_index('abbreviation'),  
        on='abbreviation', how='outer', rsuffix='_staffpop')

# compute rates of testing, rate of infection, rate of deaths
merged_tables_most_recent['rate_prisoner_tests'] = merged_tables_most_recent['prisoner_tests'] / merged_tables_most_recent['april_pop']
merged_tables_most_recent['rate_prisoner_cases'] = merged_tables_most_recent['total_prisoner_cases'] / merged_tables_most_recent['april_pop']
merged_tables_most_recent['rate_prisoner_deaths'] = merged_tables_most_recent['total_prisoner_deaths'] / merged_tables_most_recent['total_prisoner_cases']

In [8]:
merged_tables_most_recent.sort_values( by = ['prisoner_tests'], ascending = False).head()

Unnamed: 0,name,abbreviation,staff_tests,prisoner_tests,total_staff_cases,total_prisoner_cases,total_staff_deaths,total_prisoner_deaths,as_of_date,april_pop,april_pop_staffpop,rate_prisoner_tests,rate_prisoner_cases,rate_prisoner_deaths
21,Michigan,MI,,8113.0,293.0,2073.0,2.0,47.0,2020-05-06,37687,11963.0,0.215273,0.055006,0.022672
34,Ohio,OH,,6922.0,483.0,4312.0,2.0,42.0,2020-05-06,48765,12192.0,0.141946,0.088424,0.00974
41,Tennessee,TN,,5962.0,55.0,1959.0,0.0,1.0,2020-05-05,21150,5179.0,0.281891,0.092624,0.00051
42,Texas,TX,2379.0,2134.0,504.0,1336.0,5.0,23.0,2020-05-06,140124,36073.0,0.015229,0.009534,0.017216
20,Massachusetts,MA,,1906.0,151.0,351.0,0.0,7.0,2020-05-06,8173,4679.0,0.233207,0.042946,0.019943


In [64]:
merged_tables_most_recent[(merged_tables_most_recent['name'] == 'Texas') | (merged_tables_most_recent['name'] == 'Michigan') | (merged_tables_most_recent['name'] == 'New York')]

Unnamed: 0,name,abbreviation,staff_tests,prisoner_tests,total_staff_cases,total_prisoner_cases,total_staff_deaths,total_prisoner_deaths,as_of_date,april_pop,april_pop_staffpop,rate_prisoner_tests,rate_prisoner_cases,rate_prisoner_deaths
21,Michigan,MI,,8113.0,293.0,2073.0,2.0,47.0,2020-05-06,37687,11963.0,0.215273,0.055006,0.022672
31,New York,NY,,619.0,1143.0,415.0,4.0,15.0,2020-05-06,42784,19123.0,0.014468,0.0097,0.036145
42,Texas,TX,2379.0,2134.0,504.0,1336.0,5.0,23.0,2020-05-06,140124,36073.0,0.015229,0.009534,0.017216


## Covid Prison Data

From https://covidprisondata.com/data.html
+ the facility data csv file

In [10]:
# for upload if run as google colab
uploaded = files.upload()

Saving covidprisondata_facility_historical.csv to covidprisondata_facility_historical.csv


In [0]:
facility_data = pd.read_csv('covidprisondata_facility_historical.csv')

In [12]:
facility_data.columns

Index(['facilities', 'state', 'scrape_date', 'staff_positive', 'staff_deaths',
       'staff_recovered', 'units_in_quarantine', 'inmates_quarantine',
       'housing_type', 'inmates_isolation', 'inmates_positive',
       'inmates_deaths_probable', 'inmates_deaths_confirmed',
       'inmates_pending', 'inmates_recovered', 'inmates_deaths',
       'inmates_negative', 'inmates_medical_restriction',
       'inmates_medical_isolation', 'inmates_tested', 'inmates_retested',
       'daily_total_population', 'inmates_security_quarantine',
       'inmates_medical_quarantine', 'inmates_positive_in_custody',
       'inmates_released_positive', 'inmates_positive_symptomatic',
       'inmates_positive_asymptomatic', 'inmates_positive_current',
       'inmates_step_down', 'inmates_covid_deaths_underlying_conditions',
       'inmates_covid_deaths', 'inmates_medical', 'staff_negative',
       'inmates_positive_on_site', 'inmates_hospital', 'location', 'county',
       'jail_inmates_negative', 'jail_in

In [14]:
facility_data[facility_data['state'] == "Texas"].sort_values(by = ['inmates_positive'], ascending = False).head(10)

Unnamed: 0,facilities,state,scrape_date,staff_positive,staff_deaths,staff_recovered,units_in_quarantine,inmates_quarantine,housing_type,inmates_isolation,inmates_positive,inmates_deaths_probable,inmates_deaths_confirmed,inmates_pending,inmates_recovered,inmates_deaths,inmates_negative,inmates_medical_restriction,inmates_medical_isolation,inmates_tested,inmates_retested,daily_total_population,inmates_security_quarantine,inmates_medical_quarantine,inmates_positive_in_custody,inmates_released_positive,inmates_positive_symptomatic,inmates_positive_asymptomatic,inmates_positive_current,inmates_step_down,inmates_covid_deaths_underlying_conditions,inmates_covid_deaths,inmates_medical,staff_negative,inmates_positive_on_site,inmates_hospital,location,county,jail_inmates_negative,jail_inmates_positive,jail_inmates_pending,jail_inmates_total,jail_inmates_quarantine,inmates_total,parolees_negative,parolees_positive,parolees_pending,parolees_total,parolees_quarantine,juveniles_negative,juveniles_positive,juveniles_pending,juveniles_total,juveniles_quarantine,recovered,contract_staff_positive,staff_positive_asymptomatic,staff_positive_symptomatic,facility_type,staff_tested,tier_status,inmates_recovered_in_previous_calendar_day,inmates_released
152,Murray,Texas,2020-05-10,20.0,,,,,,,68.0,,,7.0,,,21.0,594.0,75.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
103,Ellis,Texas,2020-05-10,21.0,,,,,,,64.0,,,8.0,,,16.0,1304.0,72.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
82,Beto,Texas,2020-05-10,20.0,,,,,,,62.0,,,19.0,,,51.0,2349.0,81.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
104,Estelle,Texas,2020-05-10,17.0,,,,,,,52.0,,,6.0,,,62.0,893.0,58.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
166,Scott,Texas,2020-05-10,13.0,,,,,,,50.0,,,15.0,,,8.0,537.0,65.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
146,Michael,Texas,2020-05-10,25.0,,,,,,,49.0,,,8.0,,,19.0,964.0,56.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
89,Clements,Texas,2020-05-10,60.0,,,,,,,49.0,,,2.0,,,8.0,961.0,51.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
183,Wynne,Texas,2020-05-10,63.0,,,,,,,49.0,,,3.0,,,18.0,2156.0,51.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
147,Middleton,Texas,2020-05-10,9.0,,,,,,,48.0,,,14.0,,,9.0,806.0,62.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
125,Hospital Galveston,Texas,2020-05-10,,,,,,,,46.0,,,5.0,,,11.0,0.0,51.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


## Jail data by county

Data sources:
+ UCLA Spreadsheet
  + covid jail releases
  + covid facility data
+ NYTimes County-level data
+ Vera Institute county jail data

### UCLA data

From the UCLA spreadsheet
  + jail_prison_confirmed_cases.csv
    + for Texas: See TCJS data in a latter section
  + covid_jail_releases.csv

In [15]:
# for upload if run as google colab
uploaded = files.upload()

Saving covid_jail_releases.csv to covid_jail_releases.csv
Saving jail_prison_confirmed_cases.csv to jail_prison_confirmed_cases.csv


#### Jail/Prison Confirmed Cases

In [16]:
jail_prison_confirmed_cases = pd.read_csv('jail_prison_confirmed_cases.csv')

# filter jail information only
#jail_prison_confirmed_cases[(jail_prison_confirmed_cases["Facility"] == "Jail")]
jail_prison_confirmed_cases[(jail_prison_confirmed_cases["Facility"] == "Jail") & (jail_prison_confirmed_cases["State"] == "Texas")]

Unnamed: 0,Facility,State,County / Name of Facility,Confirmed Cases \n(Staff),Confirmed Cases (Residents),Confirmed Deaths\n(Staff),Confirmed Deaths\n(Residents),Staff Recovered,Residents Recovered,Date \n(last updated),Source(s),Additional Notes,Coder,Unnamed: 13
1107,Jail,Texas,Texas Commission on Jail Standards,338,1255.0,,2.0,,,5/11/20,https://www.tcjs.state.tx.us,(5/5/20) Bexar County jail resident dies of Co...,GD,


In [17]:
jail_prison_confirmed_cases[(jail_prison_confirmed_cases["Facility"] == "Jail")]

Unnamed: 0,Facility,State,County / Name of Facility,Confirmed Cases \n(Staff),Confirmed Cases (Residents),Confirmed Deaths\n(Staff),Confirmed Deaths\n(Residents),Staff Recovered,Residents Recovered,Date \n(last updated),Source(s),Additional Notes,Coder,Unnamed: 13
876,Jail,Utah,UDC Inmates Housed at a County Jail,,0.0,,0.0,,0.0,5/11/20,https://corrections.utah.gov/index.php/home/al...,(4/22/20) Utah released a facility-specific tr...,GD,
1102,Jail,New York,New York City Jails,1279.0,371.0,9.0,3.0,,,5/11/20,https://www1.nyc.gov/site/boc/covid-19.page,(5/5/20) articel reports nine Rikers Island st...,GD,
1103,Jail,Illinois,Cook County,396.0,534.0,3.0,7.0,281.0,344.0,5/11/20,https://www.cookcountysheriff.org/covid-19-cas...,"(4/26/20) 229 residents currently positive, 23...",GD,
1104,Jail,California,Los Angeles Jails,107.0,357.0,0.0,0.0,,117.0,5/11/20,https://lasd.org/covid19updates/,(4/23/20) total of 44 inmates positive to date...,GD,
1105,Jail,District of Columbia,County-Wide,74.0,177.0,1.0,1.0,37.0,127.0,5/11/20,https://coronavirus.dc.gov/page/public-safety-...,(4/13/20) One DC death reported ( https://www....,GD,
1106,Jail,Pennsylvania,Philadelphia Dept. of Prisons,,196.0,,,,184.0,5/11/20,https://www.phila.gov/programs/coronavirus-dis...,(5/11/20) the Philadelphia prison page now red...,GD,
1107,Jail,Texas,Texas Commission on Jail Standards,338.0,1255.0,,2.0,,,5/11/20,https://www.tcjs.state.tx.us,(5/5/20) Bexar County jail resident dies of Co...,GD,
1108,Jail,Louisiana,Orleans Parish Jails,82.0,131.0,3.0,0.0,25.0,,5/11/20,http://opcso.org/,(4/28/20) Staff number includes 68 OPSO employ...,GD,


#### Jail Releases

In [0]:
# from Jessica's jupyter notebook
jr = pd.read_csv('covid_jail_releases.csv')
jr = jr.iloc[1:,:7] # jail releases

# rename columns for ease of use
jr.rename(columns={'Overall Pop. Reduction / \nTotal Number of Releases':'releases',
                  'Population Prior to Releases':'prior_pop'},
          inplace=True)

# some rows have weird entries (range of values, circa values), remove
jr = jr[jr['releases'].astype(str).apply(lambda x: x.isnumeric())]
jr = jr[jr['prior_pop'].astype(str).apply(lambda x: x.isnumeric())]

# make columns numeric
jr['releases'] = pd.to_numeric(jr['releases'])
jr['prior_pop'] = pd.to_numeric(jr['prior_pop'])

# what fraction of the prior population has been released
jr['frac_reduction'] = jr['releases']/jr['prior_pop']

In [19]:
jr.sort_values('frac_reduction',ascending = False)

Unnamed: 0,State,County,Facility,Authorizing Agent,Known Capacity \n(if available),prior_pop,releases,frac_reduction
28,District of Col,DC jail,DC jail,,,1850,1442,0.779459
76,Oregon,Marion,County-wide,,,415,300,0.722892
41,Iowa,Polk,County-wide,,,1000,700,0.700000
77,Oregon,Multnomah,County-wide,"Sheriff, courts",1192.0,1097,739,0.673655
73,Oregon,Douglas,County-wide,,,194,128,0.659794
...,...,...,...,...,...,...,...,...
57,Nevada,Clark County,County-wide,,,2900,115,0.039655
60,New York,NYC,City-wide,Governor,,5000,106,0.021200
6,Arizona,Pima County,County-wide,Court,2300.0,1760,27,0.015341
62,New York,NYC,City-wide,Courts,,4300,51,0.011860


In [0]:
# filter only texas jails; take most relevant columns
texas_jail_releases = jr[jr['State'] == 'Texas']

# get rid of white spaces in the County column
texas_jail_releases.insert(1, 'county', texas_jail_releases['County'].str.strip() )
texas_jail_releases = texas_jail_releases.drop('County', axis = 1)

In [30]:
texas_jail_releases

Unnamed: 0,State,county,Facility,Authorizing Agent,Known Capacity \n(if available),prior_pop,releases,frac_reduction
87,Texas,Dallas,County-wide,Courts,,5879,1000,0.170097
88,Texas,Harris,County-wide,,,9059,1789,0.197483
89,Texas,Travis,County-wide,courts,,2164,97,0.044824


In [0]:
# filter only michigan jails; take most relevant columns
michigan_jail_releases = jr[jr['State'] == 'Michigan']

# get rid of white spaces in the County column
michigan_jail_releases.insert(1, 'county', michigan_jail_releases['County'].str.strip() )
michigan_jail_releases = michigan_jail_releases.drop('County', axis = 1)

In [63]:
michigan_jail_releases

Unnamed: 0,State,county,Facility,Authorizing Agent,Known Capacity \n(if available),prior_pop,releases,frac_reduction
51,Michigan,Wayne,,,,1400,384,0.274286
53,Michigan,Oakland,,,,1079,152,0.140871


### NYTimes County Data

+ https://github.com/nytimes/covid-19-data/blob/master/us-counties.csv

In [0]:
us_counties = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv')

# filter texas only
texas_pop_covid = us_counties[(us_counties["state"] == "Texas")]

# get most recent data
idx = texas_pop_covid.groupby(['county'])['date'].transform(max) == texas_pop_covid['date']
texas_pop_covid_mostrecent = texas_pop_covid[idx]

# drop some other  redundant columns
texas_pop_covid_mostrecent = texas_pop_covid_mostrecent.drop(['state', 'fips'], axis=1)

In [35]:
texas_pop_covid_mostrecent.head()

Unnamed: 0,date,county,cases,deaths
135000,2020-05-11,Anderson,41,0
135001,2020-05-11,Andrews,21,0
135002,2020-05-11,Angelina,100,0
135003,2020-05-11,Aransas,2,0
135004,2020-05-11,Armstrong,2,0


In [0]:
# filter michigan only
michigan_pop_covid = us_counties[(us_counties["state"] == "Michigan")]

# get most recent data
idx_michigan = michigan_pop_covid.groupby(['county'])['date'].transform(max) == michigan_pop_covid['date']
michigan_pop_covid_mostrecent = michigan_pop_covid[idx_michigan]

# drop some other  redundant columns
michigan_pop_covid_mostrecent = michigan_pop_covid_mostrecent.drop(['state', 'fips'], axis=1)

In [37]:
michigan_pop_covid_mostrecent

Unnamed: 0,date,county,cases,deaths
133827,2020-05-11,Alcona,4,1
133828,2020-05-11,Allegan,157,2
133829,2020-05-11,Alpena,87,8
133830,2020-05-11,Antrim,10,0
133831,2020-05-11,Arenac,27,1
...,...,...,...,...
133902,2020-05-11,Unknown,2263,58
133903,2020-05-11,Van Buren,93,4
133904,2020-05-11,Washtenaw,1192,81
133905,2020-05-11,Wayne,18194,2105


### Vera Institute 

Jail data by county
+ https://github.com/vera-institute/jail-population-data/blob/master/jail_population.csv



In [0]:
jail_population = pd.read_csv('https://raw.githubusercontent.com/vera-institute/jail-population-data/master/jail_population.csv')

#rename column"state_name" to "state" to match NYTimes data
jail_population.rename(columns={'state_name':'state'}, inplace = True)

# filter texas only
texas_jails = jail_population[jail_population["state"] == "Texas"]

# get most recent data
idx2 = texas_jails.groupby(['county_name'])['date'].transform(max) == texas_jails['date']
texas_jails_mostrecent = texas_jails[idx2]

# rename each entry in "Abc County" as "Abc" (remove the word County at the end, to match NYTimes data)
#     keep name in new column called county
texas_jails_mostrecent.insert(4, 'county', texas_jails_mostrecent['county_name'].str.split(' ').str[0])
#     drop the old county_name column
texas_jails_mostrecent = texas_jails_mostrecent.drop('county_name', axis = 1)

In [42]:
texas_jails_mostrecent.head()

Unnamed: 0,fips,date,jail_population,county,state,urbanicity,reporting_jurisdictions,resident_population,jail_incarceration_rate_per_100k
65622,48001,2020-05-01,138.0,Anderson,Texas,rural,Anderson County Sheriff's Office,57735.0,239.0
65627,48003,2020-05-01,28.0,Andrews,Texas,rural,Andrews County Jail,18705.0,149.7
65652,48005,2020-05-01,160.0,Angelina,Texas,rural,Angelina County Sheriffs Department,86715.0,184.5
65657,48007,2020-05-01,140.0,Aransas,Texas,small/mid,Aransas County Jail,23510.0,595.5
65761,48009,2020-05-01,20.0,Archer,Texas,small/mid,Archer County Jail,8553.0,233.8


In [0]:
# filter michigan only
michigan_jails = jail_population[jail_population["state"] == "Michigan"]

# get most recent data
idx2_michigan = michigan_jails.groupby(['county_name'])['date'].transform(max) == michigan_jails['date']
michigan_jails_mostrecent = michigan_jails[idx2_michigan]

# rename each entry in "Abc County" as "Abc" (remove the word County at the end, to match NYTimes data)
#     keep name in new column called county
michigan_jails_mostrecent.insert(4, 'county', michigan_jails_mostrecent['county_name'].str.split(' ').str[0])
#     drop the old county_name column
michigan_jails_mostrecent = michigan_jails_mostrecent.drop('county_name', axis = 1)

In [50]:
michigan_jails_mostrecent.head()

Unnamed: 0,fips,date,jail_population,county,state,urbanicity,reporting_jurisdictions,resident_population,jail_incarceration_rate_per_100k
39784,26041,2020-05-01,81.0,Delta,Michigan,rural,Delta County Jail,35784.0,226.4
39852,26073,2020-05-12,79.0,Isabella,Michigan,rural,Isabella County Jail,69872.0,113.1
39942,26111,2020-05-12,72.0,Midland,Michigan,small/mid,Midland County Jail,83156.0,86.6
39986,26125,2020-05-12,707.0,Oakland,Michigan,suburban,Oakland County Sheriffs Office,1257584.0,56.2
40029,26145,2020-05-01,275.0,Saginaw,Michigan,small/mid,Saginaw County Sheriffs Office,190539.0,144.3


### Texas jail covid data (from Texas Commision for Jail Standards)
Texas
county-level jail covid data: https://www.tcjs.state.tx.us/wp-content/uploads/2020/05/TCJS_COVID_Report.pdf
  + texas_jail_covid.csv

In [51]:
# for upload if run as google colab
uploaded = files.upload()

Saving texas_jail_covid.csv to texas_jail_covid.csv


In [0]:
texas_jail_covid = pd.read_csv('texas_jail_covid.csv')

In [53]:
texas_jail_covid

Unnamed: 0,county,date,inmates_active_positive_test_confirmation,inmates_pending_test_results,inmates_quarantined,inmates_treated_offside,inmates_deaths_confirmed,inmates_deaths_suspected,staff_active_positive_test,staff_quarantined_pending_test_results
0,Bexar,2020-05-12,208,124,1185,0,1,0,41,581
1,Dallas,2020-05-12,233,9,824,0,0,0,47,13
2,Fannin,2020-05-12,2,0,25,0,0,0,0,1
3,Gregg,2020-05-12,4,0,19,0,0,0,2,0
4,Harris,2020-05-12,669,5,2511,14,0,1,193,56
5,Haskell,2020-05-12,35,7,0,0,0,0,0,3
6,Montgomery,2020-05-12,4,1,79,0,0,0,0,6
7,Smith,2020-05-12,10,6,153,3,0,0,17,18
8,Tarrant,2020-05-12,58,0,132,3,0,0,33,57
9,Webb,2020-05-12,15,1,228,0,0,0,9,0


In [162]:
texas_jail_covid.agg('sum')

county                                       BexarDallasFanninGreggHarrisHaskellMontgomeryS...
date                                         2020-05-122020-05-122020-05-122020-05-122020-0...
inmates_active_positive_test_confirmation                                                 1238
inmates_pending_test_results                                                               153
inmates_quarantined                                                                       5156
inmates_treated_offside                                                                     20
inmates_deaths_confirmed                                                                     1
inmates_deaths_suspected                                                                     1
staff_active_positive_test                                                                 342
staff_quarantined_pending_test_results                                                     735
dtype: object

### Merges 


#### Vera Institute + NYTimes Data

In [0]:
# texas
# merge
texas_county_data0 = texas_jails_mostrecent.join(texas_pop_covid_mostrecent.set_index('county'), on='county', rsuffix='_countypop_covid')

# compute infection and death rates in the county
texas_county_data0['infection_rate_countypop'] = texas_county_data0['cases'] / texas_county_data0['resident_population']
texas_county_data0['death_rate_countypop'] = texas_county_data0['deaths'] / texas_county_data0['resident_population']

# michigan
# merge
michigan_county_data0 = michigan_jails_mostrecent.join(michigan_pop_covid_mostrecent.set_index('county'), on='county', rsuffix='_countypop_covid')

# compute infection and death rates in the county
michigan_county_data0['infection_rate_countypop'] = michigan_county_data0['cases'] / michigan_county_data0['resident_population']
michigan_county_data0['death_rate_countypop'] = michigan_county_data0['deaths'] / michigan_county_data0['resident_population']

In [55]:
texas_county_data0.head()

Unnamed: 0,fips,date,jail_population,county,state,urbanicity,reporting_jurisdictions,resident_population,jail_incarceration_rate_per_100k,date_countypop_covid,cases,deaths,infection_rate_countypop,death_rate_countypop
65622,48001,2020-05-01,138.0,Anderson,Texas,rural,Anderson County Sheriff's Office,57735.0,239.0,2020-05-11,41.0,0.0,0.00071,0.0
65627,48003,2020-05-01,28.0,Andrews,Texas,rural,Andrews County Jail,18705.0,149.7,2020-05-11,21.0,0.0,0.001123,0.0
65652,48005,2020-05-01,160.0,Angelina,Texas,rural,Angelina County Sheriffs Department,86715.0,184.5,2020-05-11,100.0,0.0,0.001153,0.0
65657,48007,2020-05-01,140.0,Aransas,Texas,small/mid,Aransas County Jail,23510.0,595.5,2020-05-11,2.0,0.0,8.5e-05,0.0
65761,48009,2020-05-01,20.0,Archer,Texas,small/mid,Archer County Jail,8553.0,233.8,,,,,


In [56]:
michigan_county_data0

Unnamed: 0,fips,date,jail_population,county,state,urbanicity,reporting_jurisdictions,resident_population,jail_incarceration_rate_per_100k,date_countypop_covid,cases,deaths,infection_rate_countypop,death_rate_countypop
39784,26041,2020-05-01,81.0,Delta,Michigan,rural,Delta County Jail,35784.0,226.4,2020-05-11,14.0,2.0,0.000391,5.6e-05
39852,26073,2020-05-12,79.0,Isabella,Michigan,rural,Isabella County Jail,69872.0,113.1,2020-05-11,62.0,7.0,0.000887,0.0001
39942,26111,2020-05-12,72.0,Midland,Michigan,small/mid,Midland County Jail,83156.0,86.6,2020-05-11,66.0,8.0,0.000794,9.6e-05
39986,26125,2020-05-12,707.0,Oakland,Michigan,suburban,Oakland County Sheriffs Office,1257584.0,56.2,2020-05-11,7752.0,849.0,0.006164,0.000675
40029,26145,2020-05-01,275.0,Saginaw,Michigan,small/mid,Saginaw County Sheriffs Office,190539.0,144.3,2020-05-11,809.0,85.0,0.004246,0.000446
40048,26159,2020-05-01,66.0,Van,Michigan,small/mid,Van Buren County Jail,75677.0,87.2,,,,,
40090,26163,2020-05-12,830.0,Wayne,Michigan,urban,Wayne County Sheriffs Office,1749343.0,47.4,2020-05-11,18194.0,2105.0,0.0104,0.001203


#### Texas: (Vera Institute + NYTimes ) + TCJS  (inner join)

In [0]:
#merge
texas_county_data1 = texas_county_data0.join( texas_jail_covid.set_index('county'), on='county', rsuffix='_tcjs', how='inner')

# compute infection rates among inmates
texas_county_data1['infection_rate_inmates'] = texas_county_data1['inmates_active_positive_test_confirmation'] / texas_county_data1['jail_population']

In [67]:
texas_county_data1.sort_values(by=['infection_rate_inmates'], ascending=False).head()

Unnamed: 0,fips,date,jail_population,county,state,urbanicity,reporting_jurisdictions,resident_population,jail_incarceration_rate_per_100k,date_countypop_covid,cases,deaths,infection_rate_countypop,death_rate_countypop,date_tcjs,inmates_active_positive_test_confirmation,inmates_pending_test_results,inmates_quarantined,inmates_treated_offside,inmates_deaths_confirmed,inmates_deaths_suspected,staff_active_positive_test,staff_quarantined_pending_test_results,infection_rate_inmates
67510,48201,2020-05-12,7492.0,Harris,Texas,urban,"Baytown City Jail, Harris County Sheriff's Office",4713325.0,159.0,2020-05-11,8176.0,179.0,0.001735,3.8e-05,2020-05-12,669,5,2511,14,0,1,193,56,0.089295
67525,48207,2020-05-01,437.0,Haskell,Texas,rural,Rolling Plains Detention Center,5658.0,7723.6,,,,,,2020-05-12,35,7,0,0,0,0,0,3,0.080092
65937,48029,2020-05-01,3341.0,Bexar,Texas,urban,Bexar County Sheriff's Office,2003554.0,166.8,2020-05-11,1920.0,57.0,0.000958,2.8e-05,2020-05-12,208,124,1185,0,1,0,41,581,0.062257
66749,48113,2020-05-01,4829.0,Dallas,Texas,urban,"Dallas County Sheriff's Department, Irving Cit...",2635516.0,183.2,2020-05-11,6123.0,145.0,0.002323,5.5e-05,2020-05-12,233,9,824,0,0,0,47,13,0.04825
70323,48479,2020-05-01,345.0,Webb,Texas,small/mid,Webb County Jail,276652.0,124.7,2020-05-11,437.0,17.0,0.00158,6.1e-05,2020-05-12,15,1,228,0,0,0,9,0,0.043478


In [68]:
texas_county_data1[['fips', 
                    'date', 
                    'jail_population', 
                    'county', 
                    'resident_population', 
                    'infection_rate_countypop', 
                    'infection_rate_inmates']].sort_values(by=['infection_rate_inmates'], ascending=False)

Unnamed: 0,fips,date,jail_population,county,resident_population,infection_rate_countypop,infection_rate_inmates
67510,48201,2020-05-12,7492.0,Harris,4713325.0,0.001735,0.089295
67525,48207,2020-05-01,437.0,Haskell,5658.0,,0.080092
65937,48029,2020-05-01,3341.0,Bexar,2003554.0,0.000958,0.062257
66749,48113,2020-05-01,4829.0,Dallas,2635516.0,0.002323,0.04825
70323,48479,2020-05-01,345.0,Webb,276652.0,0.00158,0.043478
69675,48439,2020-05-01,3494.0,Tarrant,2102515.0,0.001781,0.0166
69613,48423,2020-05-01,755.0,Smith,232751.0,0.000765,0.013245
67451,48183,2020-05-01,587.0,Gregg,123945.0,0.000952,0.006814
68621,48339,2020-05-01,612.0,Montgomery,607391.0,0.001192,0.006536
67158,48147,2020-05-01,506.0,Fannin,35514.0,0.000648,0.003953


#### Texas: (Vera Institute + NYTimes + TCJS) + Releases Info  (inner join)

In [0]:
texas_county_data2 = texas_county_data1.join(texas_jail_releases.set_index('county'), on='county', how='inner').reset_index()

In [229]:
texas_county_data2

Unnamed: 0,index,fips,date,jail_population,county,state,urbanicity,reporting_jurisdictions,resident_population,jail_incarceration_rate_per_100k,date_countypop_covid,cases,deaths,infection_rate_countypop,death_rate_countypop,date_tcjs,inmates_active_positive_test_confirmation,inmates_pending_test_results,inmates_quarantined,inmates_treated_offside,inmates_deaths_confirmed,inmates_deaths_suspected,staff_active_positive_test,staff_quarantined_pending_test_results,infection_rate_inmates,Facility,Authorizing Agent,Known Capacity \n(if available),Population Prior to Releases,Overall Pop. Reduction / \nTotal Number of Releases,Does the source report this reduction as a result of releases only or a mix of releases/policy changes?
0,66749,48113,2020-05-01,4829.0,Dallas,Texas,urban,"Dallas County Sheriff's Department, Irving Cit...",2635516.0,183.2,2020-05-11,6123.0,145.0,0.002323,5.5e-05,2020-05-12,233,9,824,0,0,0,47,13,0.04825,County-wide,Courts,,5879,1000,Mix
1,67510,48201,2020-05-12,7492.0,Harris,Texas,urban,"Baytown City Jail, Harris County Sheriff's Office",4713325.0,159.0,2020-05-11,8176.0,179.0,0.001735,3.8e-05,2020-05-12,669,5,2511,14,0,1,193,56,0.089295,County-wide,,,9059,1789,Mix


#### (Vera Institute + NYTimes) + Releases Info  (inner join)

In [0]:
texas_county_data2b = texas_county_data0.join(texas_jail_releases.set_index('county'), on='county', how='right').reset_index()

In [215]:
texas_county_data2b

Unnamed: 0,index,fips,date,jail_population,county,state,urbanicity,reporting_jurisdictions,resident_population,jail_incarceration_rate_per_100k,date_countypop_covid,cases,deaths,Facility,Authorizing Agent,Known Capacity \n(if available),Population Prior to Releases,Overall Pop. Reduction / \nTotal Number of Releases,Does the source report this reduction as a result of releases only or a mix of releases/policy changes?
0,66749,48113,2020-05-01,4829.0,Dallas,Texas,urban,"Dallas County Sheriff's Department, Irving Cit...",2635516.0,183.2,2020-05-11,6123.0,145.0,County-wide,Courts,,5879,1000,Mix
1,67510,48201,2020-05-12,7492.0,Harris,Texas,urban,"Baytown City Jail, Harris County Sheriff's Office",4713325.0,159.0,2020-05-11,8176.0,179.0,County-wide,,,9059,1789,Mix
2,70074,48453,2020-05-01,1698.0,Travis,Texas,urban,Travis County Sheriff's Office,1273954.0,133.3,2020-05-11,2171.0,65.0,County-wide,courts,,2164,97,Mix


Unnamed: 0,county,Facility,Authorizing Agent,Known Capacity \n(if available),Population Prior to Releases,Overall Pop. Reduction / \nTotal Number of Releases,Does the source report this reduction as a result of releases only or a mix of releases/policy changes?


## Other references


+ US-wide data
  + https://www.bjs.gov/content/pub/pdf/ji16.pdf
+ State-level
  + Texas 
    + county-level jail data: https://www.tcjs.state.tx.us/wp-content/uploads/2020/05/TCJS_COVID_Report.pdf
    + state-level jail data: https://www.prisonpolicy.org/profiles/TX.html
    + "Today, the Texas Department of Criminal Justice (TDCJ) oversees 17 state jails, 14 directly and three through private contractors, in 16 counties throughout the state (Exhibit 2)."  (https://comptroller.texas.gov/economy/fiscal-notes/2019/aug/jails.php)
    + http://www.prisonpro.com/content/texas-prisons-jails
    + https://www.tdcj.texas.gov/covid-19/index.html
      + Texas Department of Criminal Justice
      + Starting this week (May 12): "Testing is one key to stopping the spread of COVID-19. Thanks to Texas Governor Greg Abbott and with the help of the Texas Department of Emergency Management, the Texas Department of Criminal Justice (TDCJ) this week is beginning a new expanded testing program."
      + I think this is mainly state prison data: In the first few hours of testing nearly 1500 tests were administered and collected. Like has occurred in virtually every area where large scale asymptomatic testing has taken place TDCJ fully expects the number of positive offender cases to expand dramatically. The agency has a plan in place to medically restrict these new asymptomatic cases as needed.  In total there are 652 TDCJ employees, staff or contractors who have tested positive for COVID-19 and 1733 offenders who have tested positive. There have been 30 confirmed offender deaths and 23 pending autopsy results. 7 staff members have died from COVID-19. There are now 91 employees and 371 offenders who have medically recovered from COVID-19.  19,214 offenders are on medical restriction because they may have had contact with either an employee or offender with a positive or pending COVID-19 test.
  + Michigan:
    + Release info:
      + Ogemaw County: https://www.ogemawherald.com/stories/jail-releases-certain-inmates-due-to-covid-19-pandemic,109566
      + Governor's Executive order (April 26): https://www.michigan.gov/whitmer/0,9309,7-387-90499_90705-523422--,00.html
      + Metro Detroit County Jails: https://www.clickondetroit.com/news/local/2020/04/02/hundreds-of-inmates-released-from-metro-detroit-county-jails-amid-coronavirus-covid-19-outbreak/
      + Washtenaw county jail: https://www.michiganradio.org/post/aggressive-covid-19-plan-washtenaw-county-jail-reduces-population-more-half
      + Calhoun County: https://myemail.constantcontact.com/Press-release--First-Inmate-Identified-With-COVID-19.html?soid=1131086442739&aid=DXtI3PWzW68
