In [None]:
import pandas as pd
import requests
import zipfile
import arcpy
import sqlite3
import io
import os
pd.options.display.max_rows = 20
arcpy.env.overwriteOutput = True
os.chdir('C:\\Users\John\Dropbox\MapDev\EighthGradeCohortRegional')
os.getcwd()

### Start by downloading the 2006 cohort workbook from the THECB website

The cohort workbooks are available at: http://www.txhighereddata.org/index.cfm?objectId=F2CBE4A0-C90B-11E5-8D610050560100A9

Save the workbook as 'CohortWorkbook2006.xlsx'

In [None]:
xl = pd.read_excel('CohortWorkbook2006.xlsx', sheetname='Region Cty Eco', header=None, na_values='.', index_col=None, skiprows=5)

#Keep the columns I need
xl2=xl[[0,1,2,3,4,5,12,13, 14, 15, 16, 17, 18, 19, 22, 23]]

CountyEco=xl2[:511] #Drop the rows I don't need
CountyEco.columns=['TEAReg','RegName','CtyNum','CtyName','EcoStatus', 'CohoN', 'nHSGrad','pHSGrad','nEnr4yr','pEnr4yr',
                'nEnr2yr','pEnr2yr','nEnr', 'pEnr', 'nCompTX', 'pCompTX']
#print(CountyEcon)
#CountyEco.to_csv('Data/CountyEcon.csv', index=False)

In [None]:
xl = pd.read_excel('CohortWorkbook2006.xlsx', sheetname='Region Cty Ethnicity', header=None, na_values='.', index_col=None, skiprows=5)

#Keep the columns I need
xl2=xl[[0,1,2,3,4,5,12,13, 14, 15, 16, 17, 18, 19, 22, 23]]

CountyEth=xl2[:672] #Drop the rows I don't need
CountyEth.columns=['TEAReg','RegName','CtyNum','CtyName','Eth', 'CohoN', 'nHSGrad','pHSGrad','nEnr4yr','pEnr4yr',
                'nEnr2yr','pEnr2yr','nEnr', 'pEnr', 'nCompTX', 'pCompTX']
#print(CountyEth)
#CountyEth.to_csv('Data/CountyEth.csv', index=False)
#CountyEth.dtypes

In [None]:
xl = pd.read_excel('CohortWorkbook2006.xlsx', sheetname='Region Cty Gender', na_values='.', header=None, index_col=None, converters={'TEAReg':str,'RegName':str,'CtyName':str,'Gender':str}, skiprows=5)

#Keep the columns I need
xl2=xl[[0,1,2,3,4,5,12,13, 14, 15, 16, 17, 18, 19, 22, 23]]

CountyGen=xl2[:512] #Drop the rows I don't need
CountyGen.columns=['TEAReg','RegName','CtyNum','CtyName','Gender', 'CohoN', 'nHSGrad','pHSGrad','nEnr4yr','pEnr4yr',
                'nEnr2yr','pEnr2yr','nEnr', 'pEnr', 'nCompTX', 'pCompTX']
#print(CountyGen)
#CountyGen.to_csv('Data/CountyGen.csv', index=False)
#CountyGen.dtypes

In [None]:
GenCty= CountyGen.dropna()
EthCty= CountyEth.dropna()
EcoCty= CountyEco.dropna()
#print(EcoCty)
GenCty.to_csv('Data/TempGen.csv', index=False)
EthCty.to_csv('Data/TempEth.csv', index=False)
EcoCty.to_csv('Data/TempEco.csv', index=False)


### Download the Texas County Code to FIPS crosswalk from the [Deapertment of State Health Services](https://www.dshs.texas.gov/chs/info/info_txco.shtm)

In [None]:
xl = pd.read_excel('Data/rawCountyCrosswalk.xlsx', sheetname='Sheet1', header=0, na_values='.', index_col=None)
xl2=xl[['FIPS #','County #']]#Keep the columns I need
xl2.columns=['FIPS','CtyNum']
CountyCrosswalk=xl2[:254] #Drop the rows I don't need
#print(CountyCrosswalk)
CountyCrosswalk.to_csv('Data/CountyCrosswalk.csv', index=False)

### Load and run the R script Dataprep.R

Then load output csv files into GDB


In [None]:
arcpy.CreateFileGDB_management('Data',"CohortCounty.gdb")
#Import CSV files to the GDB
arcpy.TableToTable_conversion ('Data/EthCounties.csv', 'Data/CohortCounty.gdb', 'Eth')
arcpy.TableToTable_conversion ("Data/EcoCounties.csv", 'Data/CohortCounty.gdb', 'Eco')
arcpy.TableToTable_conversion ("Data/GenCounties.csv", 'Data/CohortCounty.gdb', 'Gen')

os.makedirs('Data/FinalShapefiles') #will be used later

In [None]:
#get Census Counties file from https://www.census.gov/geo/maps-data/data/cbf/cbf_counties.html
#and unzip
URL=requests.get('http://www2.census.gov/geo/tiger/GENZ2016/shp/cb_2016_us_county_20m.zip')
zipped=zipfile.ZipFile(io.BytesIO(URL.content))
zipped.extractall('Data/CensusShapefiles')


In [None]:
#List fields in dataset
fields = arcpy.ListFields('Data/CensusShapefiles/cb_2016_us_county_20m.shp')

for field in fields:
    print("{0} is a type of {1} with a length of {2}"
          .format(field.name, field.type, field.length))


In [None]:
#Make temporary CensusTract Layer
arcpy.MakeFeatureLayer_management("Data/CensusShapefiles/cb_2016_us_county_20m.shp", "CompleteCounties")

arcpy.SelectLayerByAttribute_management("CompleteCounties","NEW_SELECTION", "STATEFP = '48'")

#Delete unnecessary fields
arcpy.DeleteField_management("CompleteCounties", 
                             ["STATEFP", "COUNTYNS", "AFFGEOID", 'GEOID', 'NAME', 'LSAD', 'ALAND', 'AWATER'])

arcpy.FeatureClassToGeodatabase_conversion('CompleteCounties', 'Data/CohortCounty.gdb')


### The dataset has the counties of El Paso and Hudspeth combined with a county the FIPS number of 999. 

I changed the "COUNTYFP" filed for El Paso (141) and Hudspeth(229) to 999 and then dissolved the County Polygons on "COUNTYFP". This was easiest to do manually in ArcGIS Pro.

Saved in 'Data/DissolvedCounties.shp'

In [None]:
def MakeShapefiles(dim):
    table = "'Data/CohortCounty.gdb/" + dim + "'"
    #table = "'Data/" + dim + "Counties.csv'"
    FinalShp = "'Data/FinalShapefiles/" + dim + "Counties.shp'"
    outlayer = "'" + dim + "layer'"
 
    
    arcpy.MakeFeatureLayer_management ('Data/DissolvedCounties/DissolvedCounties.shp', outlayer) 
    arcpy.AddJoin_management(outlayer, 'COUNTYFP', table, 'FIPS', 'KEEP_COMMON')
    arcpy.CopyFeatures_management(outlayer, FinalShp)

    
dims=["Gen", "Eth", "Eco"]
for dim in dims:
    MakeShapefiles(dim)

In [None]:

# Now, left join the tables to the shapefiles on CtyNum == CNTY_NM
MakeFeatureLayer_management (in_features, out_layer, 
arcpy.AddJoin_management('Data/CohortCounty.gdb/CompleteCounties', 'CNTY_NM','Data/CohortCounty.gdb/Eth', 'CTYNum', 'KEEP_COMMON')
#arcpy.FeatureClassToFeatureClass_conversion(EthLayer, 'Data/FinalShapefiles', 'Eth.shp')
arcpy.CopyFeatures_management('CompleteCounties', 'Data/FinalShapefiles/EthCounties.shp')

# arcpy.JoinField_management('Data/CohortCounty.gdb/CompleteCounties', 'CNTY_NM','Data/CohortCounty.gdb/Eco', 'CTYNum', 'KEEP_COMMON')
# arcpy.CopyFeatures_management('CompleteCounties', 'Data/FinalShapefiles/EcoCounties.shp')

# arcpy.JoinField_management('Data/CohortCounty.gdb/CompleteCounties', 'CNTY_NM','Data/CohortCounty.gdb/Gen', 'CTYNum', 'KEEP_COMMON')
# arcpy.CopyFeatures_management('CompleteCounties', 'Data/FinalShapefiles/GenCounties.shp')