# Qri Notebook

This notebook is structured to improve workflow when analyzing with Qri datasets

In [1]:
import pandas as pd
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import subprocess
from io import StringIO
import os

## Defining the Qri class object
This object contains functions for basic Qri tasks such as loading the dataset body in to a Pandas dataframe, and saving & publishing a dataset.

In [2]:
class Qri():
    ## initialize the working dataset
    def __init__(self, dataset):
        self.dataset = dataset
        
    ## to get the CSV Body of a dataset
    def get_csv(self):
        cmd = f'qri get body {self.dataset}'
        csv = StringIO(subprocess.check_output(cmd, shell=True).decode("utf-8"))
        return pd.read_csv(csv)
    
    ## to get the JSON Body of a dataset
    def get_json(self):
        cmd = f'qri get body {self.dataset}'
        json = StringIO(subprocess.check_output(cmd, shell=True).decode("utf-8"))
        return pd.read_json(json)
    
    ## save the body of a dataset passing the filename and the working dir (if different from current)
    def save_body(self, filepath, wdir=os.getcwd()):
        os.chdir(wdir)
        return f"qri save --body {filepath}";
    
    ## publish dataset
    def publish(self, wdir=os.getcwd()):
        os.chdir(wdir)
        return f"qri publish {self.dataset}"

### Note:
When using the save or publish functions, the returned string needs to be placed in curly brackets preceded by an exclamation point. This is how Jupyter Notebook runs terminal commands.

#### Example:

In [None]:
# !{Qri('dataset_name').publish()}

### Loading the data

#### 1. COVID-19 cases & deaths in prisons/jails

In [3]:
covid19_df = Qri('xristosk/ucla_law_covid_prison_history').get_csv()
covid19_df.head()

Unnamed: 0,Address,Coder,Date,Facility,Name,Notes,Resident.Deaths,Residents.Confirmed,Residents.Recovered,Staff.Confirmed,Staff.Deaths,Staff.Recovered,State,Website
0,,,2020-03-31,Prison,Crossroads ATC,,0,0,0,1,0,0.0,Illinois,https://www2.illinois.gov/idoc/facilities/Page...
1,,,2020-03-31,Prison,Joliet Treatment Center,,0,0,0,1,0,0.0,Illinois,https://www2.illinois.gov/idoc/facilities/Page...
2,,,2020-03-31,Prison,Menard,,0,0,0,1,0,0.0,Illinois,https://www2.illinois.gov/idoc/facilities/Page...
3,,,2020-03-31,Prison,North Lawndale ATC,,0,4,0,1,0,0.0,Illinois,https://www2.illinois.gov/idoc/facilities/Page...
4,,,2020-03-31,Prison,Sheridan,,0,0,0,1,0,0.0,Illinois,https://www2.illinois.gov/idoc/facilities/Page...


#### 2. Daily number of people taken into NYPD custody

In [4]:
inmates_df = Qri('xristosk/nyc_daily_inmates_in_custody').get_csv()
inmates_df.head()

Unnamed: 0,inmateid,admitted_dt,discharged_dt,custody_level,bradh,race,gender,age,inmate_status_code,sealed,srg_flg,top_charge,infraction
0,20007480,06/16/2020 01:18:01 AM,,MIN,N,B,M,56.0,DE,N,N,121.12,N
1,58413,06/23/2020 11:37:27 PM,,MAX,N,B,M,39.0,DE,N,Y,160.1,N
2,20208697,05/01/2020 03:27:47 PM,,MIN,Y,B,M,43.0,DE,N,N,155.3,Y
3,20067507,02/11/2019 06:26:30 PM,,MED,N,B,M,55.0,DE,N,N,125.25,Y
4,20210238,06/05/2020 05:17:54 PM,,MED,N,B,M,53.0,DEP,N,N,160.1,N


## Data Processing

#### a. Daily inmates

In [5]:
## changing the date datatype from string to datetime & setting it as index
inmates_df['admitted_dt'] = pd.to_datetime(inmates_df['admitted_dt'])
inmates_df.set_index('admitted_dt',inplace=True)

In [6]:
## sum of inmates admitted daily & yet to be released between April and June 13th
inmates_sum = inmates_df['2020-04':'2020-06-13'].resample('D').count().cumsum()

In [7]:
inmates_sum.tail()

Unnamed: 0_level_0,inmateid,discharged_dt,custody_level,bradh,race,gender,age,inmate_status_code,sealed,srg_flg,top_charge,infraction
admitted_dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2020-06-09,885,0,881,885,884,884,884,885,885,885,785,885
2020-06-10,907,0,903,907,906,906,906,907,907,907,805,907
2020-06-11,928,0,924,928,927,927,927,928,928,928,823,928
2020-06-12,942,0,938,942,941,941,941,942,942,942,835,942
2020-06-13,959,0,953,959,958,958,958,959,959,959,850,959


#### b. COVID-19 in jails

In [8]:
covid19_df.columns

Index(['Address', 'Coder', 'Date', 'Facility', 'Name', 'Notes',
       'Resident.Deaths', 'Residents.Confirmed', 'Residents.Recovered',
       'Staff.Confirmed', 'Staff.Deaths', 'Staff.Recovered', 'State',
       'Website'],
      dtype='object')

In [9]:
covid19_df['Date'] = pd.to_datetime(covid19_df['Date'])
covid19_df.set_index('Date',inplace=True)

In [10]:
## setting the df to only NYC jails
covid19_df = covid19_df[covid19_df['Name']=='New York City Jails']

In [11]:
covid19_df.tail()

Unnamed: 0_level_0,Address,Coder,Facility,Name,Notes,Resident.Deaths,Residents.Confirmed,Residents.Recovered,Staff.Confirmed,Staff.Deaths,Staff.Recovered,State,Website
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-06-10,,GD,Jail,New York City Jails,(6/8/20) NY Board of Corrections is now postin...,3,340,0,1408,9,0.0,New York,https://www1.nyc.gov/site/boc/covid-19.page
2020-06-11,,KP,Jail,New York City Jails,(6/8/20) NY Board of Corrections is now postin...,3,340,0,1408,9,0.0,New York,https://www1.nyc.gov/site/boc/covid-19.page
2020-06-12,,GD,Jail,New York City Jails,(6/8/20) NY Board of Corrections is now postin...,3,340,0,1408,9,0.0,New York,https://www1.nyc.gov/site/boc/covid-19.page
2020-06-13,,GD,Jail,New York City Jails,(6/8/20) NY Board of Corrections is now postin...,3,340,0,1408,9,0.0,New York,https://www1.nyc.gov/site/boc/covid-19.page
2020-06-15,,KP,Jail,New York City Jails,(6/8/20) NY Board of Corrections is now postin...,3,340,0,1408,9,0.0,New York,https://www1.nyc.gov/site/boc/covid-19.page


In [12]:
covid19_sum = pd.DataFrame(covid19_df['2020-04':'2020-06-13'])

In [13]:
## creating a new column for total confirmed cases between residents and staff
covid19_sum['total.confirmed'] = covid19_sum['Residents.Confirmed'] + covid19_sum['Staff.Confirmed']
covid19_sum.tail()

Unnamed: 0_level_0,Address,Coder,Facility,Name,Notes,Resident.Deaths,Residents.Confirmed,Residents.Recovered,Staff.Confirmed,Staff.Deaths,Staff.Recovered,State,Website,total.confirmed
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2020-06-09,,KP,Jail,New York City Jails,(6/8/20) NY Board of Corrections is now postin...,3,340,0,1408,9,0.0,New York,https://www1.nyc.gov/site/boc/covid-19.page,1748
2020-06-10,,GD,Jail,New York City Jails,(6/8/20) NY Board of Corrections is now postin...,3,340,0,1408,9,0.0,New York,https://www1.nyc.gov/site/boc/covid-19.page,1748
2020-06-11,,KP,Jail,New York City Jails,(6/8/20) NY Board of Corrections is now postin...,3,340,0,1408,9,0.0,New York,https://www1.nyc.gov/site/boc/covid-19.page,1748
2020-06-12,,GD,Jail,New York City Jails,(6/8/20) NY Board of Corrections is now postin...,3,340,0,1408,9,0.0,New York,https://www1.nyc.gov/site/boc/covid-19.page,1748
2020-06-13,,GD,Jail,New York City Jails,(6/8/20) NY Board of Corrections is now postin...,3,340,0,1408,9,0.0,New York,https://www1.nyc.gov/site/boc/covid-19.page,1748


### Visuals

In [17]:
fig = go.Figure(data=go.Scatter(x=inmates_sum.index,y=inmates_sum['inmateid'],name='Inmates in Custody',
                                line={'color':'peru',
                                      'width':3}))
fig.add_trace(go.Scatter(
        x=covid19_sum.index,
        y=covid19_sum['total.confirmed'],
        name='Confirmed Cases',
        line={'color':'navy',
              'width': 3,
              'dash':'dashdot'}))

fig.update_layout(title='NYC Inmates in Custody VS. COVID-19 Cases in Jails',
                   titlefont={'family':'Arial','size':24},
                   xaxis={'showgrid':False,'tickfont':{'family':'Arial','size':14},'ticks':'inside'},
                   xaxis_title='Date',
                   yaxis={'gridcolor':'lightgray','tickfont':{'family':'Arial','size':14}},
                   plot_bgcolor='white')
fig.show()