In [1]:


# functions used throughout analysis

from __future__ import division
from IPython.core.display import HTML
import numpy as np
import pandas as pd
from collections import Counter
from scipy.stats import mode

pd.set_option('display.max_colwidth', 1)



def loadFiles(src_dir, fnName):
    fn = src_dir + fnName
    df = pd.read_csv(fn, delimiter=',')
    return df

def getDfColumns(df):
    return list(df.columns.values)

def columnString(dfList):
    return ','.join(dfList)

def findSingleCells (df):
    crossMatrix = df.as_matrix()
    single = len([ row[i] for row in crossMatrix for i in range(len(row)) if row[i] == 1 ])
    totalcells  = len([ 1 for row in crossMatrix for i in range(len(row)) ])
    kplus = totalcells - single
    percentsingle = (single/totalcells )* 100
    maxCell  = np.amax(crossMatrix) 
    minCell = np.amin(crossMatrix)
    meanCell = np.mean(crossMatrix)
    medianCell =   np.median(crossMatrix)
    return single, kplus, totalcells, percentsingle, minCell, maxCell, meanCell, medianCell

def getCrossTab(grpField, fieldList, margin=False):
    crossTab= pd.crosstab(grpField, fieldList,  margins=margin)
    return crossTab

def getSingleCellInfo( datasetName, crossTab, grpFieldList,  fieldListString):
    single, kplus, totalcells, percentsingle, minCell, maxCell, meanCell, medianCell = findSingleCells(crossTab)
    reportInfo =  "<div> *********************** </br>"
    reportInfo = reportInfo +  "<h3>" + datasetName +  ":</br>" + grpFieldList  + " By " + fieldListString + "</h3>"
    
    reportInfo = reportInfo + "<p><b> Total of number of cells in " + datasetName + "- " + fieldListString + " By " + grpFieldList +": " + str(totalcells) + "</br>" 
    reportInfo = reportInfo + "Number of Single Cells in " + datasetName + "- " + fieldListString  + " By " + grpFieldList +": " + str(single)+ "</br>"
    reportInfo = reportInfo + "Percentage of Single cells in "  + datasetName+  "- " + fieldListString +" By " + grpFieldList +": " + str( percentsingle) + "%" +"</b></br></p>"

    reportInfo = reportInfo +  "<br> Min cell value in crossTabs: " + str(minCell) + "</br>" 
    reportInfo = reportInfo + "Max cell value in crossTabs: " + str(maxCell)+ "</br>" 
    reportInfo = reportInfo + "Mean cell value in crossTabs " + str(meanCell)+ "</br>" 
    reportInfo = reportInfo + "Median cell value in crossTabs: " + str(medianCell)+ "</br>" 
    reportInfo =  HTML( reportInfo )
    return reportInfo 
    
    
def makeOutput(datasetName, src_dir, fnName):
    df = loadFiles(src_dir, fnName) 
    df_column_names =  getDfColumns(df)
    titleStuff = "<H1>**************************************</br>" + "Dataset: " + datasetName  + "</br>"
    titleStuff  = titleStuff + "<p> Columns: "+":</p>"
    titleStuff = titleStuff + "<p>" + ", ".join(df_column_names) + "</p></H1>"
    titleStuff = HTML(titleStuff)
    return df, titleStuff

def makeCrossTabInfo( grpField, grpFieldList, fieldList, fieldListString,  datasetName):
    crossTab = getCrossTab( grpField, fieldList)
    single_cell_info = getSingleCellInfo( datasetName, crossTab, grpFieldList, fieldListString)
    return crossTab, single_cell_info



In [2]:

datasetName = "Juvenile_Probation_Petitions_Sustained"
src_dir = '/home/ubuntu/workspace/source_data/'
fnName = 'Juvenile_Probation_Petitions_Sustained.csv'
        
grpFieldList = 'Year Petition Sustained'
df, titleStuff = makeOutput(datasetName, src_dir , fnName )
titleStuff

In [3]:
grpField = [ df['Year Petition Sustained']]
fieldListString = 'Ethnicity,Gender'
fieldList = [ df['Ethnicity'],df['Gender']]
crossBy, single_cell_info = makeCrossTabInfo( grpField, grpFieldList, fieldList, fieldListString,  datasetName)
single_cell_info

In [4]:
crossBy

Ethnicity,african american,african american,american indian,asian,asian,hispanic,hispanic,other,other,white,white
Gender,female,male,male,female,male,female,male,female,male,female,male
Year Petition Sustained,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
2011,74,301,5,13,59,42,153,6,7,7,18
2012,86,226,0,6,53,18,117,0,9,5,18
2013,75,245,1,7,54,23,104,3,5,4,31
2014,58,212,0,7,33,21,97,0,4,3,12
2015,56,183,0,6,34,12,71,0,4,3,13
2016,1,10,0,0,1,3,9,0,0,0,0


In [5]:
grpField = [ df['Year Petition Sustained']]
fieldListString = 'Ethnicity,Gender,Petition Sustained Offense Degree Level'
fieldList = [ df['Ethnicity'],df['Gender'],df['Petition Sustained Offense Degree Level']]
crossBy, single_cell_info = makeCrossTabInfo( grpField, grpFieldList, fieldList, fieldListString,  datasetName)
single_cell_info

In [6]:
crossBy

Ethnicity,african american,african american,african american,african american,african american,african american,african american,american indian,american indian,american indian,...,other,other,other,other,other,white,white,white,white,white
Gender,female,female,female,male,male,male,male,male,male,male,...,female,female,male,male,male,female,female,male,male,male
Petition Sustained Offense Degree Level,felony,misdemeanor,non-contact,felony,infraction,misdemeanor,non-contact,felony,misdemeanor,non-contact,...,felony,misdemeanor,felony,misdemeanor,non-contact,felony,misdemeanor,felony,misdemeanor,non-contact
Year Petition Sustained,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2011,23,42,9,167,0,105,29,2,2,1,...,1,5,2,5,0,1,6,7,9,2
2012,42,36,8,133,0,61,32,0,0,0,...,0,0,5,3,1,2,3,6,10,2
2013,40,30,5,145,1,70,29,0,1,0,...,1,2,5,0,0,0,4,17,11,3
2014,22,28,8,123,0,62,27,0,0,0,...,0,0,1,2,1,1,2,4,6,2
2015,17,31,8,88,0,66,29,0,0,0,...,0,0,0,3,1,0,3,7,5,1
2016,0,1,0,8,0,2,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:

datasetName = "Juvenile_Probation_Selected_Court_Dispositions.csv"
src_dir = '/home/ubuntu/workspace/source_data/'
fnName = 'Juvenile_Probation_Selected_Court_Dispositions.csv'
        
grpFieldList = 'Disposition Year'
df, titleStuff = makeOutput(datasetName, src_dir , fnName )
titleStuff

In [8]:
grpField = [ df['Disposition Year']]
fieldListString = 'Ethnicity,Gender'
fieldList = [ df['Ethnicity'],df['Gender']]
crossBy, single_cell_info = makeCrossTabInfo( grpField, grpFieldList, fieldList, fieldListString,  datasetName)
single_cell_info

In [9]:
crossBy

Ethnicity,african american,african american,american indian,asian,asian,hispanic,hispanic,korean,korean,other,other,unknown,white,white
Gender,female,male,male,female,male,female,male,female,male,female,male,male,female,male
Disposition Year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
2011,114,412,4,17,102,56,251,0,0,4,21,1,17,22
2012,112,330,1,9,78,21,161,1,0,1,14,0,9,30
2013,92,300,1,12,71,28,154,0,1,5,8,0,5,44
2014,80,238,0,7,52,32,141,0,0,0,5,0,5,19
2015,73,230,0,9,39,22,97,0,0,0,3,1,4,15


In [10]:
grpField = [ df['Disposition Year']]
fieldListString = 'Court Disposition Summary, Age At Court Disposition '
fieldList = [ df['Court Disposition Summary'],df['Age At Court Disposition']]
crossBy, single_cell_info = makeCrossTabInfo( grpField, grpFieldList, fieldList, fieldListString,  datasetName)
single_cell_info

In [11]:
crossBy

Court Disposition Summary,725a probation,725a probation,725a probation,commit to djj,commit to djj,commit to juvenile hall,commit to juvenile hall,commit to lcrs,commit to lcrs,commit to out of home placement,...,petition suspended in favor of 654wi,petition suspended in favor of 654wi,petition suspended in favor of 654wi,recommit to lcrs,recommit to lcrs,remand to adult court,remand to adult court,transfer out to another county,transfer out to another county,transfer out to another county
Age At Court Disposition,13-17,18+,<=12,13-17,18+,13-17,18+,13-17,18+,13-17,...,13-17,18+,<=12,13-17,18+,13-17,18+,13-17,18+,<=12
Disposition Year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2011,25,1,0,0,2,49,8,20,0,126,...,99,7,9,3,0,2,2,104,4,0
2012,26,2,0,4,0,32,12,31,1,92,...,86,5,3,0,0,0,0,97,12,1
2013,31,4,2,1,2,15,6,22,0,83,...,58,4,2,0,0,0,0,112,14,0
2014,18,2,0,3,1,8,4,11,2,104,...,46,2,2,0,0,0,0,86,12,0
2015,9,1,1,1,0,0,7,17,9,70,...,28,2,3,0,1,0,0,60,10,3
