In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Workflow of Project
This project utilizes the Data Science Project Life Cycle, which has the following steps:

1. Business Understanding
2. Data Collection
3. Data Preparation
4. Exploratory Data Analysis
5. Modelling
6. Model Evaluation
7. Model Deployment

# Part 1: Business Understanding

### Introduction to concepts of Economic Growth and Productive Employment

* Gross Domestic Product(GDP) as a measure of Economic Growth
> * GDP = value of goods and services produced by the nations economy - value of goods and services used up in production
> * Two measures of GDP will be used in this study, the Contribution_by_GDP and Growth_by_GDP

* Productive Employment
> * Productive Employment is employment yielding sufficient returns to labour, to permit a worker and his/her dependents a level of consumption above the poverty line
> * The International Labour Organization (ILO) has set the International Poverty Line to USD 2 (USD 1.90) a day; hence any person earning below USD 2 a day is considered poor, a group the ILO refers to as the 'Working Poor'
> * The working poor in this case will be the people earning below KSh 10,000 a month (Wage_bracket_0_to_9999)

### --------------------------------------------------- End of Business Understanding Section (Part 1)----------------------------------------

# Part 2: Data Collection

---



---



---


### Perform Data Extraction from PDF Files using camelot-py Module

### Prerequisites


* Installing dependecies for camelot-py, which include GhostScript and TKinter, on local machine
* Installing camelot-py module
* Testing camelot-py module
* Downloading yearly Statistical Abstract files from Kenya National Bureau of Statitics (KNBS)

#### Sources of data (PDF files, courtesy of KNBS)


* Statistical Abstract 2013
* Statistical Abstract 2014
* Statistical Abstract 2015
* Statistical Abstract 2017
* Statistical Abstract 2019

To extract data from the pdf files, I connect to local runtime, which has camelot-py module installed; to use the module for extraction

### Workflow
1. Set up notebook server to allow connection to local runtime; using the command provided in the next section
2. Look up for desired tables in pdf files, noting page numbers
3. Extract tables from the pdf files using the page numbers
4. Export tables to csv files, which will then be used in the data preparation section

#### Command for starting notebook server

jupyter notebook in command prompt or in terminal

import camelot module

In [3]:
import camelot

### Extracting tables from the given datasets

Statistical Abstract 2013

In [4]:
# Statistical Abstract 2013
pathPdf = 'D:/Hackathon/Hackathon (16) - Python Project/Hackathon (16) - Python Project/PDFs/PDFs/STATISTICAL ABSTRACT 2013.pdf'
pathDataset = "D:/Hackathon/Hackathon (16) - Python Project/Hackathon (16) - Python Project/CSV FILES/"
tables = camelot.read_pdf(pathPdf, pages='285', flavor='stream', strip_text='*+')

numOfTables=tables.n
print('Number of tables: ' + str(numOfTables))

#Parsing Report
print('PARSING REPORT')
print(tables[0].parsing_report)

Number of tables: 1
PARSING REPORT
{'accuracy': 95.47, 'whitespace': 11.79, 'order': 1, 'page': 285}


In [5]:
#saving table from pdf to csv
tables[0].to_csv(pathDataset + '/Wage Employment 2011.csv')

Statistical Abstract 2014

In [6]:
pathpdf = "D:/Hackathon/Hackathon (16) - Python Project/Hackathon (16) - Python Project/PDFs/PDFs/STATISTICAL ABSTRACT 2014.pdf"
tables = camelot.read_pdf(pathpdf,pages = "74,77,265,266,267,268", flavor="stream", strip_text ="*+\n" )

nOfTables = tables.n
print("Number of tables: " + str(nOfTables))

#parsing Report
print("PARSING REPORT")

for i in range(nOfTables):
    print(tables[i].parsing_report)

Number of tables: 6
PARSING REPORT
{'accuracy': 99.36, 'whitespace': 8.12, 'order': 1, 'page': 74}
{'accuracy': 99.63, 'whitespace': 10.68, 'order': 1, 'page': 77}
{'accuracy': 91.57, 'whitespace': 31.06, 'order': 1, 'page': 265}
{'accuracy': 97.76, 'whitespace': 14.48, 'order': 1, 'page': 266}
{'accuracy': 99.17, 'whitespace': 14.48, 'order': 1, 'page': 267}
{'accuracy': 99.65, 'whitespace': 18.93, 'order': 1, 'page': 268}


In [7]:
#Exporting Tables to CSV Files
tables[0].to_csv(pathDataset + '/Contribution to GDP by Percent 2009-2013.csv')
tables[1].to_csv(pathDataset + '/Growth  of GDP by Activity 2009-2013.csv')
tables[2].to_csv(pathDataset + '/Wage Employment 2010-2013.csv')
tables[3].to_csv(pathDataset + '/Wage Employment 2012.csv')
tables[4].to_csv(pathDataset + '/Wage Employment 2013.csv')
tables[5].to_csv(pathDataset + '/Wage Employment by Sex and Income 2010-2013.csv')

Statistical Abstract 2017

In [8]:
# Statistical Abstract 2017
pathPdf = 'D:/Hackathon/Hackathon (16) - Python Project/Hackathon (16) - Python Project/PDFs/PDFs/STATISTICAL ABSTRACT 2017.pdf'
tables = camelot.read_pdf(pathPdf, pages='101,102', flavor='stream', strip_text='*+\n')

numOfTables=tables.n
print('Number of tables: ' + str(numOfTables))

#Parsing Report
print('PARSING REPORT')
i=0
while i < numOfTables:
    print(tables[i].parsing_report)
    i+=1

Number of tables: 4
PARSING REPORT
{'accuracy': 92.98, 'whitespace': 24.84, 'order': 1, 'page': 101}
{'accuracy': 99.79, 'whitespace': 25.0, 'order': 2, 'page': 101}
{'accuracy': 92.98, 'whitespace': 24.84, 'order': 1, 'page': 102}
{'accuracy': 99.79, 'whitespace': 25.0, 'order': 2, 'page': 102}


In [9]:
tables[1].to_csv(pathDataset + '/Wage Employment 2015.csv')
tables[3].to_csv(pathDataset + '/Wage Employment 2016.csv')

Statistical Abstract 2019

In [10]:
# Statistical Abstract 2019
pathPdf = 'D:/Hackathon/Hackathon (16) - Python Project/Hackathon (16) - Python Project/PDFs/PDFs/Statistical Abstract 2019.pdf'
tables = camelot.read_pdf(pathPdf, pages='30,32,63,64,65,66', flavor='stream', strip_text='*+\n')

numOfTables=tables.n
print('Number of tables: ' + str(numOfTables))

#Parsing Report
print('PARSING REPORT')
i=0
for i in range(numOfTables):
    print(tables[i].parsing_report)

Number of tables: 6
PARSING REPORT
{'accuracy': 99.55, 'whitespace': 9.21, 'order': 1, 'page': 30}
{'accuracy': 99.62, 'whitespace': 8.97, 'order': 1, 'page': 32}
{'accuracy': 88.77, 'whitespace': 29.29, 'order': 1, 'page': 63}
{'accuracy': 99.94, 'whitespace': 22.76, 'order': 1, 'page': 64}
{'accuracy': 99.95, 'whitespace': 22.67, 'order': 1, 'page': 65}
{'accuracy': 99.44, 'whitespace': 19.71, 'order': 1, 'page': 66}


In [11]:
#Exporting Tables to CSV Files
tables[0].to_csv(pathDataset + '/Contribution to GDP by Percent 2012-2018.csv')
tables[1].to_csv(pathDataset + '/Growth  of GDP by Activity 2012-2018.csv')
tables[2].to_csv(pathDataset + '/Wage Employment 2014-2018.csv')
tables[3].to_csv(pathDataset + '/Wage Employment 2017.csv')
tables[4].to_csv(pathDataset + '/Wage Employment 2018.csv')
tables[5].to_csv(pathDataset + '/Wage Employment by Sex and Income 2014-2018.csv')

After performing data collection, we can now disconnect local runtime and switch to hosted runtime for the next sections 

### --------------------------------------------------- End of Data Collection Section (Part 2)--------------------------------------------

# Part 3: Data Preparation

---


### Prerequisites
1. Preparing datasets using Microsoft Excel
2. Connection to hosted runtime
3. Migrating prepared datasets from local disk to Google Drive
4. Mounting Google Drive

### Workflow

1.   Joining yearly datasets to a single dataset spanning all the years (2011 -2018)
2.   Data Pre-processing

### Mounting Google Drive

In [12]:
import gdown

# Replace 'your_file_id' with the actual file ID from your Google Drive shareable link
file_id = 'your_file_id'
url = f'https://drive.google.com/uc?id={file_id}'

# Replace 'your_destination_path' with the path where you want to save the file
output = 'your_destination_path'

gdown.download(url, output, quiet=False)


Access denied with the following error:



 	Cannot retrieve the public link of the file. You may need to change
	the permission to 'Anyone with the link', or have had many accesses. 

You may still be able to access the file from the browser:

	 https://drive.google.com/uc?id=your_file_id 



### Importing all packages needed

In [13]:
#import libraries
import sys 
import numpy as np # linear algebra
from scipy.stats import randint
import pandas as pd # data processing, CSV file I/O 
import matplotlib.pyplot as plt # this is used for the plot the graph 
import seaborn as sns # used for plot interactive graph. 

#### 3.1 Joining yearly datasets to a single dataset spanning all the years

In [14]:
df2011 = pd.read_csv(r"D:\Hackathon/Hackathon (16) - Python Project/Hackathon (16) - Python Project/CSV FILES/PROJECT 1 FILES/Wage_Employment_and_GDP_2011.csv")
df2012 = pd.read_csv(r"D:\Hackathon/Hackathon (16) - Python Project/Hackathon (16) - Python Project/CSV FILES/PROJECT 1 FILES/Wage_Employment_and_GDP_2012.csv")
df2013 = pd.read_csv(r"D:\Hackathon/Hackathon (16) - Python Project/Hackathon (16) - Python Project/CSV FILES/PROJECT 1 FILES/Wage_Employment_and_GDP_2013.csv")
df2014 = pd.read_csv(r"D:\Hackathon/Hackathon (16) - Python Project/Hackathon (16) - Python Project/CSV FILES/PROJECT 1 FILES/Wage_Employment_and_GDP_2014.csv")
df2015 = pd.read_csv(r"D:\Hackathon/Hackathon (16) - Python Project/Hackathon (16) - Python Project/CSV FILES/PROJECT 1 FILES/Wage_Employment_and_GDP_2015.csv")
df2016 = pd.read_csv(r"D:\Hackathon/Hackathon (16) - Python Project/Hackathon (16) - Python Project/CSV FILES/PROJECT 1 FILES/Wage_Employment_and_GDP_2016.csv")
df2017 = pd.read_csv(r"D:\Hackathon/Hackathon (16) - Python Project/Hackathon (16) - Python Project/CSV FILES/PROJECT 1 FILES/Wage_Employment_and_GDP_2017.csv")
df2018 = pd.read_csv(r"D:\Hackathon/Hackathon (16) - Python Project/Hackathon (16) - Python Project/CSV FILES/PROJECT 1 FILES/Wage_Employment_and_GDP_2018.csv")

Checking shape of Yearly dataframes

In [15]:
list_df = [df2011,df2012,df2013,df2014,df2015,df2016,df2017,df2018]

for i in list_df:
    print(i.shape)

(21, 13)
(21, 13)
(21, 13)
(21, 13)
(21, 13)
(21, 13)
(21, 13)
(21, 13)


In [16]:
df_list = [df2011, df2012, df2013, df2014, df2015, df2016, df2017, df2018]
df = pd.concat(df_list,ignore_index=True,sort=False)
df.to_csv(pathDataset + 'Wage_Employment_and_GDP_2011_to_2018.csv')

### 3.2 Data Preprocessing

In [17]:
df = pd.read_csv(r"D:\Hackathon\Hackathon (16) - Python Project\Hackathon (16) - Python Project\CSV FILES\Wage_Employment_and_GDP_2011_to_2018.csv")
df.head(10)

Unnamed: 0.1,Unnamed: 0,Industry,Year,Contribution_by_Gdp,Growth_of_GDP,"0 - 9,999","10,000 - 14999","15,000 - 19999","20,000 - 24999","25,000 - 29999","30,000 - 49999","50,000 - 99999",100000+,TOTAL
0,0,"Agriculture, Forestry And Fishing",2011,23.8,1.5,12141,36238,111549,100240,37712,23976,12935,6631,341422
1,1,Mining And Quarrying,2011,0.7,7.1,59,101,944,1586,1965,2312,1752,13,8732
2,2,Manufacturing,2011,9.6,3.4,632,1723,17205,50949,54427,76329,70040,5580,276885
3,3,"Electricity, Gas, Steam And Air Conditioning S...",2011,0.4,-4.4,-,101,45,3229,690,1413,6830,30,12338
4,4,"Water Supply; Sewerage, Waste Management And R...",2011,0.7,3.0,-,-,-,3036,1983,1970,818,83,7890
5,5,Construction,2011,4.1,4.3,-,714,2465,17669,16531,35028,32245,2593,107245
6,6,Wholesale And Retail Trade; Repair Of Motor Ve...,2011,10.5,7.3,56,2179,4363,20215,25842,52176,57676,8174,170681
7,7,Transportation And Storage,2011,7.8,5.4,821,386,841,11032,16544,22798,22740,1296,76458
8,8,Accommodation And Food Service Activities,2011,1.7,4.9,318,269,1245,9485,12600,27588,17001,2303,70809
9,9,Information And Communication,2011,2.2,4.3,48,49,105,2684,8438,41299,24220,1389,78232


In [18]:
df.columns

Index(['Unnamed: 0', 'Industry', 'Year', 'Contribution_by_Gdp',
       'Growth_of_GDP', '0 - 9,999', '10,000 - 14999', '15,000 - 19999',
       '20,000 - 24999', '25,000 - 29999', '30,000 - 49999', '50,000 - 99999',
       '100000+', 'TOTAL'],
      dtype='object')

In [19]:
df.drop('Unnamed: 0', axis=1, inplace=True)

In [20]:
df.head()

Unnamed: 0,Industry,Year,Contribution_by_Gdp,Growth_of_GDP,"0 - 9,999","10,000 - 14999","15,000 - 19999","20,000 - 24999","25,000 - 29999","30,000 - 49999","50,000 - 99999",100000+,TOTAL
0,"Agriculture, Forestry And Fishing",2011,23.8,1.5,12141,36238,111549,100240,37712,23976,12935,6631,341422
1,Mining And Quarrying,2011,0.7,7.1,59,101,944,1586,1965,2312,1752,13,8732
2,Manufacturing,2011,9.6,3.4,632,1723,17205,50949,54427,76329,70040,5580,276885
3,"Electricity, Gas, Steam And Air Conditioning S...",2011,0.4,-4.4,-,101,45,3229,690,1413,6830,30,12338
4,"Water Supply; Sewerage, Waste Management And R...",2011,0.7,3.0,-,-,-,3036,1983,1970,818,83,7890


### Renaming Columns

In [21]:
df.columns = ['Industry', 'Year', 'Contribution_to_Gdp',
       'Growth_of_GDP', 'Wage_bracket_0_to_9999', 'Wage_bracket_10000_to_14999', 'Wage_bracket_15000_to_19999',
       'Wage_bracket_20000_to_24999', 'Wage_bracket_25000_to_29999', 'Wage_bracket_30000_to_49999', 'Wage_bracket_50000_to_99999',
       'Wage_bracket_100000_plus', 'TOTAL']

In [22]:
df.head()

Unnamed: 0,Industry,Year,Contribution_to_Gdp,Growth_of_GDP,Wage_bracket_0_to_9999,Wage_bracket_10000_to_14999,Wage_bracket_15000_to_19999,Wage_bracket_20000_to_24999,Wage_bracket_25000_to_29999,Wage_bracket_30000_to_49999,Wage_bracket_50000_to_99999,Wage_bracket_100000_plus,TOTAL
0,"Agriculture, Forestry And Fishing",2011,23.8,1.5,12141,36238,111549,100240,37712,23976,12935,6631,341422
1,Mining And Quarrying,2011,0.7,7.1,59,101,944,1586,1965,2312,1752,13,8732
2,Manufacturing,2011,9.6,3.4,632,1723,17205,50949,54427,76329,70040,5580,276885
3,"Electricity, Gas, Steam And Air Conditioning S...",2011,0.4,-4.4,-,101,45,3229,690,1413,6830,30,12338
4,"Water Supply; Sewerage, Waste Management And R...",2011,0.7,3.0,-,-,-,3036,1983,1970,818,83,7890


In [23]:
#no of unique industry columns
df.Industry.nunique()

21

There are 21 unique columns in the compiled dataset as shown by the result above, which is as expected

#### Removing special characters from Wage_bracket columns

In [30]:
cols = ['Wage_bracket_0_to_9999', 'Wage_bracket_10000_to_14999', 'Wage_bracket_15000_to_19999',
       'Wage_bracket_20000_to_24999', 'Wage_bracket_25000_to_29999', 'Wage_bracket_30000_to_49999', 'Wage_bracket_50000_to_99999',
       'Wage_bracket_100000_plus', 'TOTAL']


#cast to string
df[cols] = df[cols].astype(str)

#Removing special characters
df[cols] = df[cols].replace({'\\$': '', ',': '', '-': ''}, regex=True)

#path to dataset
path = "D:/Hackathon/Hackathon (16) - Python Project/Hackathon (16) - Python Project/CSV FILES/"
df.to_csv(path + 'Wage_Employment_and_GDP_2011_to_2018_Final.csv')

In [31]:
df.head()

Unnamed: 0,Industry,Year,Contribution_to_Gdp,Growth_of_GDP,Wage_bracket_0_to_9999,Wage_bracket_10000_to_14999,Wage_bracket_15000_to_19999,Wage_bracket_20000_to_24999,Wage_bracket_25000_to_29999,Wage_bracket_30000_to_49999,Wage_bracket_50000_to_99999,Wage_bracket_100000_plus,TOTAL
0,"Agriculture, Forestry And Fishing",2011,23.8,1.5,12141.0,36238.0,111549.0,100240,37712,23976,12935,6631,341422
1,Mining And Quarrying,2011,0.7,7.1,59.0,101.0,944.0,1586,1965,2312,1752,13,8732
2,Manufacturing,2011,9.6,3.4,632.0,1723.0,17205.0,50949,54427,76329,70040,5580,276885
3,"Electricity, Gas, Steam And Air Conditioning S...",2011,0.4,-4.4,,101.0,45.0,3229,690,1413,6830,30,12338
4,"Water Supply; Sewerage, Waste Management And R...",2011,0.7,3.0,,,,3036,1983,1970,818,83,7890


In [35]:
pathDataset = "D:/Hackathon/Hackathon (16) - Python Project/Hackathon (16) - Python Project/CSV FILES/Wage_Employment_and_GDP_2011_to_2018_Final.csv"
df = pd.read_csv(pathDataset,parse_dates=["Year"],index_col=["Year"],na_values=["nan","?","-"])
df

Unnamed: 0_level_0,Unnamed: 0,Industry,Contribution_to_Gdp,Growth_of_GDP,Wage_bracket_0_to_9999,Wage_bracket_10000_to_14999,Wage_bracket_15000_to_19999,Wage_bracket_20000_to_24999,Wage_bracket_25000_to_29999,Wage_bracket_30000_to_49999,Wage_bracket_50000_to_99999,Wage_bracket_100000_plus,TOTAL
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2011-01-01,0,"Agriculture, Forestry And Fishing",23.8,1.5,12141.0,36238.0,111549.0,100240.0,37712.0,23976.0,12935.0,6631.0,341422.0
2011-01-01,1,Mining And Quarrying,0.7,7.1,59.0,101.0,944.0,1586.0,1965.0,2312.0,1752.0,13.0,8732.0
2011-01-01,2,Manufacturing,9.6,3.4,632.0,1723.0,17205.0,50949.0,54427.0,76329.0,70040.0,5580.0,276885.0
2011-01-01,3,"Electricity, Gas, Steam And Air Conditioning S...",0.4,-4.4,,101.0,45.0,3229.0,690.0,1413.0,6830.0,30.0,12338.0
2011-01-01,4,"Water Supply; Sewerage, Waste Management And R...",0.7,3.0,,,,3036.0,1983.0,1970.0,818.0,83.0,7890.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-01-01,163,Human Health And Social Work Activities,1.5,4.5,,960.0,6108.0,17810.0,36788.0,65229.0,17572.0,4287.0,148755.0
2018-01-01,164,"Arts, Entertainment And Recreation",0.1,6.4,,46.0,857.0,1446.0,2277.0,1188.0,878.0,550.0,7243.0
2018-01-01,165,Other Service Activities,0.6,6.7,,552.0,1735.0,4490.0,11270.0,11251.0,6312.0,722.0,36332.0
2018-01-01,166,Activities Of Households As Employers; Undiffe...,0.4,1.6,,316.0,911.0,17245.0,15082.0,53200.0,29083.0,,


In [36]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 168 entries, 2011-01-01 to 2018-01-01
Data columns (total 13 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   Unnamed: 0                   168 non-null    int64  
 1   Industry                     168 non-null    object 
 2   Contribution_to_Gdp          166 non-null    float64
 3   Growth_of_GDP                161 non-null    float64
 4   Wage_bracket_0_to_9999       96 non-null     float64
 5   Wage_bracket_10000_to_14999  144 non-null    float64
 6   Wage_bracket_15000_to_19999  152 non-null    float64
 7   Wage_bracket_20000_to_24999  167 non-null    float64
 8   Wage_bracket_25000_to_29999  167 non-null    float64
 9   Wage_bracket_30000_to_49999  167 non-null    float64
 10  Wage_bracket_50000_to_99999  167 non-null    float64
 11  Wage_bracket_100000_plus     154 non-null    float64
 12  TOTAL                        167 non-null    float64
dtypes

In [38]:
df.dtypes

Unnamed: 0                       int64
Industry                        object
Contribution_to_Gdp            float64
Growth_of_GDP                  float64
Wage_bracket_0_to_9999         float64
Wage_bracket_10000_to_14999    float64
Wage_bracket_15000_to_19999    float64
Wage_bracket_20000_to_24999    float64
Wage_bracket_25000_to_29999    float64
Wage_bracket_30000_to_49999    float64
Wage_bracket_50000_to_99999    float64
Wage_bracket_100000_plus       float64
TOTAL                          float64
dtype: object

In [39]:
df.shape

(168, 13)

In [40]:
df.columns

Index(['Unnamed: 0', 'Industry', 'Contribution_to_Gdp', 'Growth_of_GDP',
       'Wage_bracket_0_to_9999', 'Wage_bracket_10000_to_14999',
       'Wage_bracket_15000_to_19999', 'Wage_bracket_20000_to_24999',
       'Wage_bracket_25000_to_29999', 'Wage_bracket_30000_to_49999',
       'Wage_bracket_50000_to_99999', 'Wage_bracket_100000_plus', 'TOTAL'],
      dtype='object')

### Dealing with nan values -- filling nan with mean in the columns

In [52]:
#finding all columns that have nan values
nan_list = []
for j in range(2,13):
    if not df.iloc[:,j].notnull().all():
        nan_list.append(j)
nan_list

[]

In [50]:
cols = ['Contribution_to_Gdp', 'Growth_of_GDP',
       'Wage_bracket_0_to_9999', 'Wage_bracket_10000_to_14999',
       'Wage_bracket_15000_to_19999', 'Wage_bracket_20000_to_24999',
       'Wage_bracket_25000_to_29999', 'Wage_bracket_30000_to_49999',
       'Wage_bracket_50000_to_99999', 'Wage_bracket_100000_plus', 'TOTAL']
    
for i in cols:
    df[i] = df[i].fillna(df[i].mean())



In [53]:
df.isnull().sum()

Unnamed: 0                     0
Industry                       0
Contribution_to_Gdp            0
Growth_of_GDP                  0
Wage_bracket_0_to_9999         0
Wage_bracket_10000_to_14999    0
Wage_bracket_15000_to_19999    0
Wage_bracket_20000_to_24999    0
Wage_bracket_25000_to_29999    0
Wage_bracket_30000_to_49999    0
Wage_bracket_50000_to_99999    0
Wage_bracket_100000_plus       0
TOTAL                          0
dtype: int64