In [21]:
#File:getdata.ipynb
#Author: Rafer Cooley
#Desc:notebook for functions associated with loading raw data into pandas dataframes
from IPython.display import HTML
import pandas as pd
import os, sys
import base64 #for createdownloadlink

##Files
data_folder = '../new-data/'
dci_data_folder = data_folder+'DCI/all-combined/'
dci_index_crimes = dci_data_folder+'index-offenses/'
dfs_afcars_data_file = data_folder+'DFS/DFS_12-16-AFCARS-CLEAN.csv'
dfs_county_data_file = data_folder+'DFS/DFS_12-16-Placements-ByCounty-CLEAN.csv'
dfs_plc_data_file = data_folder+'DFS/DFS_12-16-Placements-ByPLC-CLEAN.csv'
school_discipline_data_file = data_folder+'school-discipline/SchoolDiscipline_2007-17_Combined-CSV2.csv'
ori_data_file = data_folder+'juvenile-arrests/ori_juvenile_arrest_2010-2016_CLEAN.csv'
##End Files

class DataFunctions:
    #found:https://stackoverflow.com/questions/31893930/download-csv-from-an-ipython-notebook
    def create_download_link(self,df, title = "Download CSV file", filename = "data.csv"):
        csv1 = df.to_csv()
        b64 = base64.b64encode(csv1.encode())
        payload = b64.decode()
        html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
        html = html.format(payload=payload,title=title,filename=filename)
        return HTML(html)
    #end found

    ##Get Data Functs
    def getOverview(self):#DONE
        gather_overview_frames = []
        current_path = dci_data_folder+'overview/'
        year_files = next(os.walk(current_path))[2]
        for f in year_files:#process each file for this year
            file_path = current_path+'/'+f
            year = f.split('-')[0]
            try:
                df = pd.read_csv(file_path,sep=',',header='infer',index_col=0)
                df.assign(Year=year)
                gather_overview_frames.append(df)
            except Exception as e:
                print('cant load file: '+file_path)
                print('['+str(e)+']')
                print('*****************************************************************************************')
        return pd.concat(gather_overview_frames)

    def getDFS(self):
        afcars = pd.read_csv(dfs_afcars_data_file,sep=',',header='infer',index_col=[0,1])
        #afcars.set_index('COUNTY')
        county = pd.read_csv(dfs_county_data_file,sep=',',header='infer',index_col=[0,1])
        #county.set_index('COUNTY')
        plc = pd.read_csv(dfs_plc_data_file,sep=',',header='infer',index_col=0)
        return afcars, county, plc

    def getSchool(self):
        df = pd.read_csv(school_discipline_data_file,sep=',',header='infer')
        df['County'] = df['DISTRICT_NAME'].str.split('#').str[0]
        df.set_index(['County','Beginning Year','End Year'],inplace=True)
        return df

    #fix this to dynamically load files
    #replaced with ori data DO NOT USE!
    def getIndexCrimes(self):
        obj1 = pd.read_csv(dci_index_crimes+'2016-index-cp-after.csv',sep=',',header='infer')
        obj1['year']='2016'
        obj2 = pd.read_csv(dci_index_crimes+'2015-index-cp-after.csv',sep=',',header='infer')
        obj2['year']='2015'
        obj3 = pd.read_csv(dci_index_crimes+'2014-index-cp-after.csv',sep=',',header='infer')
        obj3['year']='2014'
        #newobj = obj['2016']#+obj['2015']+obj['2014']
        newobj = pd.concat([obj1,obj2,obj3])
        return newobj
        #return pd.read_csv(dci_index_crimes+'2016-index-cp-after.csv',sep=',',header='infer')

    def getORIData(self):
        return pd.read_csv(ori_data_file,sep=',',header='infer',index_col=[1,0])


In [22]:
dfunct = DataFunctions()
overview = dfunct.getOverview()
dfs = dfunct.getDFS()#afcars, county, plc
school = dfunct.getSchool()
#school['County'] = school['DISTRICT_NAME'].str.split('#').str[0]
index_crimes = dfunct.getIndexCrimes()#DO NOT USE THIS! replaced by ori data
juvenile_arrests = dfunct.getORIData()

print('overview')
print(overview.head(3))
print('**********************************')
print('dfs[0]')
print(dfs[0].head(3))
print('**********************************')
print('dfs[1]')
print(dfs[1].head(3))
print('**********************************')
print('dfs[2]')
print(dfs[2].head(3))
print('**********************************')
print('school')
print(school.head(3))
# print('**********************************')
# print('index_crimes')
# print(index_crimes.head(3))
print('**********************************')
print('juvy arrests')
print(juvenile_arrests.head(3))

overview
     Index Crimes  Actual Offenses  Offenses Cleared % Cleared  \
Year                                                             
2015       Murder               11                 7    63.60%   
2015         Rape              158                38    24.10%   
2015      Robbery               54                26    48.10%   

     Stolen Property % Change  Crime Index  
Year                                        
2015               0   -8.30%          0.2  
2015             852   -1.90%          2.7  
2015           26000    1.90%          0.9  
**********************************
dfs[0]
             CHINS  DE
COUNTY YEAR           
Albany 2012      0  10
       2013      0   9
       2014      1  14
**********************************
dfs[1]
             CHINS  DE
COUNTY YEAR           
Albany 2012    0.0  34
       2013    0.0  37
       2014    1.0  45
**********************************
dfs[2]
             YEAR  CHINS   DE
PLC Type                     
Boys School  2012  

In [30]:
set(school.index.levels[0].str.strip())

{'Albany',
 'Big Horn',
 'Campbell',
 'Carbon',
 'Converse',
 'Crook',
 'Fremont',
 'Goshen',
 'Hot Springs',
 'Johnson',
 'Laramie',
 'Lincoln',
 'Natrona',
 'Niobrara',
 'Park',
 'Platte',
 'Sheridan',
 'Sublette',
 'Sweetwater',
 'Teton',
 'Uinta',
 'Washakie',
 'Weston'}