In [1]:
import pandas as pd

import df_manip as mp

In [2]:
state_data = pd.read_pickle("proc_data/Simplified_State_Data_1860.pkl")
state_data.head()

Unnamed: 0,GISJOIN,STATE,TotalPop,UrbPop2500,UrbPop25k,White,FreeCol,Slave,Native,Mixed,...,ManLaCost,ManProdVal,Fam,free_nat,free_for,slaves1to9,slaves10to99,slaves100,Support,Slavery
0,G010,Alabama,964201,48901,29258,526271,2690,435080,160.0,0.0,...,2132940.0,10588566.0,96603,516769,12352,21793.0,11591.0,346.0,CSA,Slave
1,G050,Arkansas,435450,3727,0,324143,144,111115,48.0,0.0,...,554240.0,2880578.0,57244,320735,3600,8341.0,3074.0,66.0,CSA,Slave
2,G060,California,379994,78651,56802,323177,4086,0,17798.0,0.0,...,28402288.0,68253232.0,98767,233466,146528,0.0,0.0,0.0,USA,Free
3,G090,Connecticut,460147,122121,66184,451504,8627,0,16.0,0.0,...,19026196.0,81924552.0,94831,379451,80696,0.0,0.0,0.0,USA,Free
4,G100,Delaware,112216,21258,0,90589,19829,1798,0.0,0.0,...,1905754.0,9892902.0,18966,101253,9165,562.0,25.0,0.0,USA,Slave


In [3]:
print(state_data.columns.to_list())

['GISJOIN', 'STATE', 'TotalPop', 'UrbPop2500', 'UrbPop25k', 'White', 'FreeCol', 'Slave', 'Native', 'Mixed', 'Asian', 'ManEst', 'ManCap', 'ManRaw', 'ManLabMal', 'ManLabFe', 'ManLaCost', 'ManProdVal', 'Fam', 'free_nat', 'free_for', 'slaves1to9', 'slaves10to99', 'slaves100', 'Support', 'Slavery']


In [4]:
# Creating a new dataframe for analysis - key information mostly as fraction of state population
# Note that the new dataframe is explicityly marked .copy() to avoid warnings below

state_anal = state_data[["STATE", "Slavery", "Support", "TotalPop"]].copy()

state_anal.head()

Unnamed: 0,STATE,Slavery,Support,TotalPop
0,Alabama,Slave,CSA,964201
1,Arkansas,Slave,CSA,435450
2,California,Free,USA,379994
3,Connecticut,Free,USA,460147
4,Delaware,Slave,USA,112216


In [5]:
# Re-caclulating most of the data for later analysis - allocation with .loc to establish best coding practice
 
# Fraction of Slaves to population
state_anal.loc[:, 'slaves_frac'] = state_data['Slave'] / state_data['TotalPop']

slaveholders = ['slaves1to9', 'slaves10to99', 'slaves100']
# Fraction of Slave Holders to free population
state_anal.loc[:, 'slave_hold_frac'] = mp.column_frac(state_data, slaveholders, ['White', 'FreeCol', 'Native'])

# Fraction of Slaveholders 10 or more slaves to all slaveholders
state_anal.loc[:, 'slave_hold_lg_vs_sm'] = mp.column_frac(state_data, ['slaves10to99', 'slaves100'], slaveholders)

# Fraction Foreign Born
state_anal.loc[:, 'for_born_frac'] = mp.column_frac(state_data, ['free_for'], ['TotalPop'])

# Fraction of people employed in Manufacturing
state_anal.loc[:, 'man_em_frac'] = mp.column_frac(state_data, ['ManLabMal', 'ManLabFe'], ['TotalPop'])

# Fraction of people living in centres > 2500 (>25k gets too many 'zeroes')
state_anal.loc[:, 'urb_frac'] = mp.column_frac(state_data, ['UrbPop25k', 'UrbPop2500'], ['TotalPop'])

# Manufacturing Capital+Manufacturing Raw Material - $per Pop
state_anal.loc[:, 'man_dollar_per_pop'] = mp.column_frac(state_data, ["ManCap", "ManRaw"], ["TotalPop"])

state_anal.head()

Unnamed: 0,STATE,Slavery,Support,TotalPop,slaves_frac,slave_hold_frac,slave_hold_lg_vs_sm,for_born_frac,man_em_frac,urb_frac,man_dollar_per_pop
0,Alabama,Slave,CSA,964201,0.451234,0.063747,0.353899,0.012811,0.008182,0.081061,15.129776
1,Arkansas,Slave,CSA,435450,0.255173,0.035399,0.273495,0.008267,0.00431,0.008559,5.964205
2,California,Free,USA,379994,0.0,0.0,0.0,0.385606,0.129544,0.356461,129.198803
3,Connecticut,Free,USA,460147,0.0,0.0,0.0,0.17537,0.140105,0.409228,187.982362
4,Delaware,Slave,USA,112216,0.016023,0.005316,0.042589,0.081673,0.05722,0.189438,102.318787


In [6]:
#Calculating the fraction of women employed in Manufacturing (just for fun)

sums = state_data.sum(axis=0)
women_employed = sums['ManLabFe']/(sums['ManLabFe'] + sums['ManLabMal'])

print(women_employed)


0.207350804713376


In [7]:
state_anal.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33 entries, 0 to 32
Data columns (total 11 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   STATE                33 non-null     object 
 1   Slavery              33 non-null     object 
 2   Support              33 non-null     object 
 3   TotalPop             33 non-null     int64  
 4   slaves_frac          33 non-null     float64
 5   slave_hold_frac      33 non-null     float64
 6   slave_hold_lg_vs_sm  33 non-null     float64
 7   for_born_frac        33 non-null     float64
 8   man_em_frac          33 non-null     float64
 9   urb_frac             33 non-null     float64
 10  man_dollar_per_pop   33 non-null     float64
dtypes: float64(7), int64(1), object(3)
memory usage: 3.0+ KB


In [8]:
#Saving as CSV and Pickle

state_anal.to_excel("proc_data/Analyzed_State_Data_1860.xlsx")
state_anal.to_pickle("proc_data/Analyzed_State_Data_1860.pkl")