# Importing necessary dataset

In [1]:
import pandas as pd
import numpy as np
import glob

## Importing all 15 csv files

In [2]:
file_path = r'G:\DataScience\Data Insight\NAICS\csv_files'
all_csv_files = glob.glob(file_path + '/*.csv')

csv_list = []

for csv_file in all_csv_files:
    combined_dataframe = pd.read_csv(csv_file, index_col = None, header = 0)
    csv_list.append(combined_dataframe)
    
data = pd.concat(csv_list, axis = 0, ignore_index = True)
data

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,2000,1,Accommodation and food services [72],148000
1,2000,1,"Administrative and support, waste management a...",59250
2,2000,1,"Agriculture, forestry, fishing and hunting [11]",61750
3,2000,1,"Arts, entertainment and recreation [71]",39500
4,2000,1,Construction [23],106250
...,...,...,...,...
119179,1999,12,9111,2250
119180,1999,12,9120,28500
119181,1999,12,9130,30250
119182,1999,12,9141,500


In [3]:
data.NAICS.value_counts()

Wholesale trade  [41]                         276
Food manufacturing[311]                       276
Publishing industries[511]                    276
Support activities for transportation[488]    276
Fishing, hunting and trapping[114]            276
                                             ... 
1131                                          180
3161                                          168
5612                                          108
5211                                           36
Monetary authorities - central bank[521]       36
Name: NAICS, Length: 437, dtype: int64

In [4]:
#data[['SYEAR', 'SMTH']] = data[['SYEAR', 'SMTH']].astype(object) 
#print(data.dtypes) 

## Getting *Postal service, couriers and messengers* from Postal service[491] & Couriers and messengers[492]

In [5]:
Couriers_and_messengers = data.loc[data['NAICS'].str.contains('492', na = False)]
Couriers_and_messengers

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
5489,2000,1,Couriers and messengers[492],10000
5591,2000,2,Couriers and messengers[492],9250
5693,2000,3,Couriers and messengers[492],9750
5795,2000,4,Couriers and messengers[492],7250
5897,2000,5,Couriers and messengers[492],7250
...,...,...,...,...
33114,1999,8,Couriers and messengers[492],9500
33217,1999,9,Couriers and messengers[492],11000
33320,1999,10,Couriers and messengers[492],12750
33423,1999,11,Couriers and messengers[492],10000


In [6]:
Postal_service = data.loc[data['NAICS'].str.contains('491', na = False)]
Postal_service

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
5543,2000,1,Postal service[491],9750
5645,2000,2,Postal service[491],8750
5747,2000,3,Postal service[491],8250
5849,2000,4,Postal service[491],7500
5951,2000,5,Postal service[491],9750
...,...,...,...,...
33169,1999,8,Postal service[491],6750
33272,1999,9,Postal service[491],7500
33375,1999,10,Postal service[491],8000
33478,1999,11,Postal service[491],6750


In [7]:
Postal_service_couriers_and_messengers = pd.concat([Postal_service, Couriers_and_messengers])
Postal_service_couriers_and_messengers

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
5543,2000,1,Postal service[491],9750
5645,2000,2,Postal service[491],8750
5747,2000,3,Postal service[491],8250
5849,2000,4,Postal service[491],7500
5951,2000,5,Postal service[491],9750
...,...,...,...,...
33114,1999,8,Couriers and messengers[492],9500
33217,1999,9,Couriers and messengers[492],11000
33320,1999,10,Couriers and messengers[492],12750
33423,1999,11,Couriers and messengers[492],10000


### Changing all names of the NACIS column to *Postal service, couriers and messengers*

After changing all column names to *Postal service, couriers and messengers*, the dataframe was sorted by the **SYEAR and SMTH** columns. Then, the *Employment* column was segregated to add up consecutives even rows to get the sum of empoyments from both [491] and [492].

After that, the dataframe was merged back and the columns names were changed to reflect *Data Output Template*.

In [8]:
Postal_service_couriers_and_messengers = Postal_service_couriers_and_messengers.replace(
    'Postal service[491]', 'Postal service, couriers and messengers')
Postal_service_couriers_and_messengers = Postal_service_couriers_and_messengers.replace(
    'Couriers and messengers[492]', 'Postal service, couriers and messengers')
Postal_service_couriers_and_messengers

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
5543,2000,1,"Postal service, couriers and messengers",9750
5645,2000,2,"Postal service, couriers and messengers",8750
5747,2000,3,"Postal service, couriers and messengers",8250
5849,2000,4,"Postal service, couriers and messengers",7500
5951,2000,5,"Postal service, couriers and messengers",9750
...,...,...,...,...
33114,1999,8,"Postal service, couriers and messengers",9500
33217,1999,9,"Postal service, couriers and messengers",11000
33320,1999,10,"Postal service, couriers and messengers",12750
33423,1999,11,"Postal service, couriers and messengers",10000


In [9]:
df1 = Postal_service_couriers_and_messengers.sort_values(['SYEAR', 'SMTH'])
df1.head(50)

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
29976,1997,1,"Postal service, couriers and messengers",8000
29921,1997,1,"Postal service, couriers and messengers",8250
30079,1997,2,"Postal service, couriers and messengers",8000
30024,1997,2,"Postal service, couriers and messengers",9000
30182,1997,3,"Postal service, couriers and messengers",7750
30127,1997,3,"Postal service, couriers and messengers",7500
30285,1997,4,"Postal service, couriers and messengers",8500
30230,1997,4,"Postal service, couriers and messengers",6000
30388,1997,5,"Postal service, couriers and messengers",7500
30333,1997,5,"Postal service, couriers and messengers",7500


In [10]:
df = df1.drop(['SYEAR', 'SMTH', 'NAICS'], axis = 1)
df

Unnamed: 0,_EMPLOYMENT_
29976,8000
29921,8250
30079,8000
30024,9000
30182,7750
...,...
29618,0
29772,0
29719,0
29873,0


In [11]:
df.index = np.arange(1,len(df)+1)
df = df.reset_index()
df

Unnamed: 0,index,_EMPLOYMENT_
0,1,8000
1,2,8250
2,3,8000
3,4,9000
4,5,7750
...,...,...
547,548,0
548,549,0
549,550,0
550,551,0


In [12]:
df = df.set_index('index')
df_odd = df.loc[df.index.values % 2 == 1]
df_even = df.loc[df.index.values % 2 == 0]
df_even = df_even.set_index(df_even.index.values - 1)
new = df_odd.add(df_even, fill_value = 0)
new = new.reset_index().reset_index()
new

Unnamed: 0,level_0,index,_EMPLOYMENT_
0,0,1,16250
1,1,3,17000
2,2,5,15250
3,3,7,14500
4,4,9,15000
...,...,...,...
271,271,543,21250
272,272,545,19000
273,273,547,0
274,274,549,0


In [13]:
df2 = df1[np.arange(len(df)) % 2 == 0]
df2 = df2.reset_index().reset_index()
df2

Unnamed: 0,level_0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,0,29976,1997,1,"Postal service, couriers and messengers",8000
1,1,30079,1997,2,"Postal service, couriers and messengers",8000
2,2,30182,1997,3,"Postal service, couriers and messengers",7750
3,3,30285,1997,4,"Postal service, couriers and messengers",8500
4,4,30388,1997,5,"Postal service, couriers and messengers",7500
...,...,...,...,...,...,...
271,271,29469,2019,8,"Postal service, couriers and messengers",9750
272,272,29570,2019,9,"Postal service, couriers and messengers",9750
273,273,29671,2019,10,"Postal service, couriers and messengers",0
274,274,29772,2019,11,"Postal service, couriers and messengers",0


In [14]:
df3 = pd.merge(df2, new, how = 'inner', on = 'level_0')
df3 = df3.drop(['level_0', 'index_x', '_EMPLOYMENT__x', 'index_y'], axis = 1)
df3.columns = ['SYEAR', 'SMTH', 'LMO_Detailed_Industry', 'Employment']
Postal_service_couriers_and_messengers = df3
Postal_service_couriers_and_messengers.tail(30)

Unnamed: 0,SYEAR,SMTH,LMO_Detailed_Industry,Employment
246,2017,7,"Postal service, couriers and messengers",16250
247,2017,8,"Postal service, couriers and messengers",15250
248,2017,9,"Postal service, couriers and messengers",10000
249,2017,10,"Postal service, couriers and messengers",13500
250,2017,11,"Postal service, couriers and messengers",12250
251,2017,12,"Postal service, couriers and messengers",16750
252,2018,1,"Postal service, couriers and messengers",11750
253,2018,2,"Postal service, couriers and messengers",13750
254,2018,3,"Postal service, couriers and messengers",14000
255,2018,4,"Postal service, couriers and messengers",15500


## Getting *Farms* from Crop production[111] & Animal production and aquaculture[112]

In [15]:
Crop_Production = data.loc[data['NAICS'].str.contains('111', na = False)]
Crop_Production

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
5491,2000,1,Crop production[111],11250
5593,2000,2,Crop production[111],10250
5695,2000,3,Crop production[111],13500
5797,2000,4,Crop production[111],17000
5899,2000,5,Crop production[111],21000
...,...,...,...,...
33116,1999,8,Crop production[111],20250
33219,1999,9,Crop production[111],19500
33322,1999,10,Crop production[111],17000
33425,1999,11,Crop production[111],13000


In [16]:
Animal_production_and_aquaculture = data.loc[data['NAICS'].str.contains('112', na = False)]
Animal_production_and_aquaculture

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
5478,2000,1,Animal production and aquaculture[112],12250
5580,2000,2,Animal production and aquaculture[112],12000
5682,2000,3,Animal production and aquaculture[112],10500
5784,2000,4,Animal production and aquaculture[112],7500
5886,2000,5,Animal production and aquaculture[112],8500
...,...,...,...,...
33103,1999,8,Animal production and aquaculture[112],11500
33206,1999,9,Animal production and aquaculture[112],11500
33309,1999,10,Animal production and aquaculture[112],11500
33412,1999,11,Animal production and aquaculture[112],14000


In [17]:
Farms = pd.concat([Crop_Production, Animal_production_and_aquaculture])
Farms

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
5491,2000,1,Crop production[111],11250
5593,2000,2,Crop production[111],10250
5695,2000,3,Crop production[111],13500
5797,2000,4,Crop production[111],17000
5899,2000,5,Crop production[111],21000
...,...,...,...,...
33103,1999,8,Animal production and aquaculture[112],11500
33206,1999,9,Animal production and aquaculture[112],11500
33309,1999,10,Animal production and aquaculture[112],11500
33412,1999,11,Animal production and aquaculture[112],14000


### Changing all names of the NACIS column to *Farms*

After changing all column names to *Farms*, the dataframe was sorted by the **SYEAR and SMTH** columns. Then, the *Employment* column was segregated to add up consecutives even rows to get the sum of empoyments from both [111] and [112].

After that, the dataframe was merged back and the columns names were changed to reflect *Data Output Template*.

In [18]:
Farms = Farms.replace('Crop production[111]', 'Farms')
Farms = Farms.replace('Animal production and aquaculture[112]', 'Farms')
Farms

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
5491,2000,1,Farms,11250
5593,2000,2,Farms,10250
5695,2000,3,Farms,13500
5797,2000,4,Farms,17000
5899,2000,5,Farms,21000
...,...,...,...,...
33103,1999,8,Farms,11500
33206,1999,9,Farms,11500
33309,1999,10,Farms,11500
33412,1999,11,Farms,14000


In [19]:
df1 = Farms.sort_values(['SYEAR', 'SMTH'])
df1.head(50)

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
29923,1997,1,Farms,8750
29910,1997,1,Farms,13500
30026,1997,2,Farms,9750
30013,1997,2,Farms,14000
30129,1997,3,Farms,10500
30116,1997,3,Farms,13250
30232,1997,4,Farms,10500
30219,1997,4,Farms,15000
30335,1997,5,Farms,12000
30322,1997,5,Farms,15500


### Executing all aforementioned steps in one cell 

In [20]:
df = df1.drop(['SYEAR', 'SMTH', 'NAICS'], axis = 1)
df

df.index = np.arange(1,len(df)+1)
df = df.reset_index()
df

df = df.set_index('index')
df_odd = df.loc[df.index.values % 2 == 1]
df_even = df.loc[df.index.values % 2 == 0]
df_even = df_even.set_index(df_even.index.values - 1)
new = df_odd.add(df_even, fill_value = 0)
new = new.reset_index().reset_index()
new

df2 = df1[np.arange(len(df)) % 2 == 0]
df2 = df2.reset_index().reset_index()
df2

df3 = pd.merge(df2, new, how = 'inner', on = 'level_0')
df3 = df3.drop(['level_0', 'index_x', '_EMPLOYMENT__x', 'index_y'], axis = 1)
df3.columns = ['SYEAR', 'SMTH', 'LMO_Detailed_Industry', 'Employment']
Farms = df3
Farms

Unnamed: 0,SYEAR,SMTH,LMO_Detailed_Industry,Employment
0,1997,1,Farms,22250
1,1997,2,Farms,23750
2,1997,3,Farms,23750
3,1997,4,Farms,25500
4,1997,5,Farms,27500
...,...,...,...,...
271,2019,8,Farms,27500
272,2019,9,Farms,27750
273,2019,10,Farms,0
274,2019,11,Farms,0


## Getting *Food, beverage and tobacco manufacturing* from Food manufacturing[311] & Beverage and tobacco product manufacturing[312]

In [21]:
Food_manufacturing = data.loc[data['NAICS'].str.contains('311', na = False)]
Food_manufacturing

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
5501,2000,1,Food manufacturing[311],19500
5603,2000,2,Food manufacturing[311],16000
5705,2000,3,Food manufacturing[311],14750
5807,2000,4,Food manufacturing[311],18500
5909,2000,5,Food manufacturing[311],19750
...,...,...,...,...
33126,1999,8,Food manufacturing[311],19000
33229,1999,9,Food manufacturing[311],18250
33332,1999,10,Food manufacturing[311],20000
33435,1999,11,Food manufacturing[311],17500


In [22]:
Beverage_and_tobacco_product_manufacturing = data.loc[data['NAICS'].str.contains('312', na = False)]
Beverage_and_tobacco_product_manufacturing

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
5479,2000,1,Beverage and tobacco product manufacturing[312],3250
5581,2000,2,Beverage and tobacco product manufacturing[312],4250
5683,2000,3,Beverage and tobacco product manufacturing[312],3750
5785,2000,4,Beverage and tobacco product manufacturing[312],4750
5887,2000,5,Beverage and tobacco product manufacturing[312],5000
...,...,...,...,...
33104,1999,8,Beverage and tobacco product manufacturing[312],3250
33207,1999,9,Beverage and tobacco product manufacturing[312],2250
33310,1999,10,Beverage and tobacco product manufacturing[312],2500
33413,1999,11,Beverage and tobacco product manufacturing[312],2000


In [23]:
Food_beverage_and_tobacco_manufacturing = pd.concat([Food_manufacturing, Beverage_and_tobacco_product_manufacturing])
Food_beverage_and_tobacco_manufacturing

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
5501,2000,1,Food manufacturing[311],19500
5603,2000,2,Food manufacturing[311],16000
5705,2000,3,Food manufacturing[311],14750
5807,2000,4,Food manufacturing[311],18500
5909,2000,5,Food manufacturing[311],19750
...,...,...,...,...
33104,1999,8,Beverage and tobacco product manufacturing[312],3250
33207,1999,9,Beverage and tobacco product manufacturing[312],2250
33310,1999,10,Beverage and tobacco product manufacturing[312],2500
33413,1999,11,Beverage and tobacco product manufacturing[312],2000


### Changing all names of the NACIS column to *Food, beverage and tobacco manufacturing*

After changing all column names to *Food, beverage and tobacco manufacturing*, the dataframe was sorted by the **SYEAR and SMTH** columns. Then, the *Employment* column was segregated to add up consecutives even rows to get the sum of empoyments from both [311] and [312].

After that, the dataframe was merged back and the columns names were changed to reflect *Data Output Template*.

In [24]:
Food_beverage_and_tobacco_manufacturing = Food_beverage_and_tobacco_manufacturing.replace(
    'Food manufacturing[311]', 'Food, beverage and tobacco manufacturing')
Food_beverage_and_tobacco_manufacturing = Food_beverage_and_tobacco_manufacturing.replace(
    'Beverage and tobacco product manufacturing[312]', 'Food, beverage and tobacco manufacturing')
Food_beverage_and_tobacco_manufacturing

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
5501,2000,1,"Food, beverage and tobacco manufacturing",19500
5603,2000,2,"Food, beverage and tobacco manufacturing",16000
5705,2000,3,"Food, beverage and tobacco manufacturing",14750
5807,2000,4,"Food, beverage and tobacco manufacturing",18500
5909,2000,5,"Food, beverage and tobacco manufacturing",19750
...,...,...,...,...
33104,1999,8,"Food, beverage and tobacco manufacturing",3250
33207,1999,9,"Food, beverage and tobacco manufacturing",2250
33310,1999,10,"Food, beverage and tobacco manufacturing",2500
33413,1999,11,"Food, beverage and tobacco manufacturing",2000


In [25]:
df1 = Food_beverage_and_tobacco_manufacturing.sort_values(['SYEAR', 'SMTH'])
df1.tail(50)

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
27408,2017,12,"Food, beverage and tobacco manufacturing",21750
27386,2017,12,"Food, beverage and tobacco manufacturing",8250
27509,2018,1,"Food, beverage and tobacco manufacturing",27000
27487,2018,1,"Food, beverage and tobacco manufacturing",8500
27610,2018,2,"Food, beverage and tobacco manufacturing",26000
27588,2018,2,"Food, beverage and tobacco manufacturing",8500
27711,2018,3,"Food, beverage and tobacco manufacturing",28250
27689,2018,3,"Food, beverage and tobacco manufacturing",6500
27812,2018,4,"Food, beverage and tobacco manufacturing",25000
27790,2018,4,"Food, beverage and tobacco manufacturing",8250


### Executing all aforementioned steps in one cell 

In [26]:
df = df1.drop(['SYEAR', 'SMTH', 'NAICS'], axis = 1)
df

df.index = np.arange(1,len(df)+1)
df = df.reset_index()
df

df = df.set_index('index')
df_odd = df.loc[df.index.values % 2 == 1]
df_even = df.loc[df.index.values % 2 == 0]
df_even = df_even.set_index(df_even.index.values - 1)
new = df_odd.add(df_even, fill_value = 0)
new = new.reset_index().reset_index()
new

df2 = df1[np.arange(len(df)) % 2 == 0]
df2 = df2.reset_index().reset_index()
df2

df3 = pd.merge(df2, new, how = 'inner', on = 'level_0')
df3 = df3.drop(['level_0', 'index_x', '_EMPLOYMENT__x', 'index_y'], axis = 1)
df3.columns = ['SYEAR', 'SMTH', 'LMO_Detailed_Industry', 'Employment']
Food_beverage_and_tobacco_manufacturing = df3
Food_beverage_and_tobacco_manufacturing

Unnamed: 0,SYEAR,SMTH,LMO_Detailed_Industry,Employment
0,1997,1,"Food, beverage and tobacco manufacturing",24000
1,1997,2,"Food, beverage and tobacco manufacturing",26500
2,1997,3,"Food, beverage and tobacco manufacturing",25250
3,1997,4,"Food, beverage and tobacco manufacturing",21000
4,1997,5,"Food, beverage and tobacco manufacturing",21750
...,...,...,...,...
271,2019,8,"Food, beverage and tobacco manufacturing",36250
272,2019,9,"Food, beverage and tobacco manufacturing",35250
273,2019,10,"Food, beverage and tobacco manufacturing",0
274,2019,11,"Food, beverage and tobacco manufacturing",0


## Getting  *Business, building and other support services*  from Management of companies and enterprises[55] & Administrative and support, waste management and remediation services[56]

The whole process was repeated to obatin the *Business, building and other support services*. After changing all column names to *Business, building and other support services*, the dataframe was sorted by the **SYEAR and SMTH** columns. Then, the *Employment* column was segregated to add up consecutives even rows to get the sum of empoyments from both [55] and [56].

After that, the dataframe was merged back and the columns names were changed to reflect *Data Output Template*.

In [27]:
MCE = data[data['NAICS'] == 'Management of companies and enterprises [55]']
MCE

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
9,2000,1,Management of companies and enterprises [55],1000
29,2000,2,Management of companies and enterprises [55],1500
49,2000,3,Management of companies and enterprises [55],1500
69,2000,4,Management of companies and enterprises [55],1000
89,2000,5,Management of companies and enterprises [55],1000
...,...,...,...,...
5381,1999,8,Management of companies and enterprises [55],500
5401,1999,9,Management of companies and enterprises [55],250
5421,1999,10,Management of companies and enterprises [55],1250
5441,1999,11,Management of companies and enterprises [55],1500


In [28]:
MCE.SYEAR.value_counts().sort_values()

2015    12
2012    12
2011    12
2010    12
2009    12
2008    12
2007    12
2014    12
2013    12
2006    12
2003    12
2002    12
2001    12
2000    12
1999    12
1998    12
2005    12
2004    12
1997    12
Name: SYEAR, dtype: int64

As we can see, **Management of companies and enterprises** collected data till 2015, as such the *2016-2019* data has to be removed from **Administrative and support, waste management and remediation services** before algorithm can correctly add consecutive even rows to get the required sum. After getting the correct sum, the *2016-2019* will be added.

In [29]:
AWR = data[data['NAICS'] == 'Administrative and support, waste management and remediation services [56]']
AWR

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
1,2000,1,"Administrative and support, waste management a...",59250
21,2000,2,"Administrative and support, waste management a...",64000
41,2000,3,"Administrative and support, waste management a...",64750
61,2000,4,"Administrative and support, waste management a...",66750
81,2000,5,"Administrative and support, waste management a...",63000
...,...,...,...,...
5373,1999,8,"Administrative and support, waste management a...",68750
5393,1999,9,"Administrative and support, waste management a...",65750
5413,1999,10,"Administrative and support, waste management a...",59000
5433,1999,11,"Administrative and support, waste management a...",62250


In [30]:
AWR_reduced = AWR[(data['SYEAR'] != 2016) & (data['SYEAR'] != 2017) & (data['SYEAR'] != 2018) & (data['SYEAR'] != 2019)]
AWR_reduced

  """Entry point for launching an IPython kernel.


Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
1,2000,1,"Administrative and support, waste management a...",59250
21,2000,2,"Administrative and support, waste management a...",64000
41,2000,3,"Administrative and support, waste management a...",64750
61,2000,4,"Administrative and support, waste management a...",66750
81,2000,5,"Administrative and support, waste management a...",63000
...,...,...,...,...
5373,1999,8,"Administrative and support, waste management a...",68750
5393,1999,9,"Administrative and support, waste management a...",65750
5413,1999,10,"Administrative and support, waste management a...",59000
5433,1999,11,"Administrative and support, waste management a...",62250


In [31]:
AWR_removed_data = AWR[(data['SYEAR'] == 2016) | (data['SYEAR'] == 2017) | (data['SYEAR'] == 2018) | (data['SYEAR'] == 2019)]
AWR_removed_data

  """Entry point for launching an IPython kernel.


Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
3841,2016,1,"Administrative and support, waste management a...",92250
3860,2016,2,"Administrative and support, waste management a...",92250
3879,2016,3,"Administrative and support, waste management a...",92750
3898,2016,4,"Administrative and support, waste management a...",100750
3917,2016,5,"Administrative and support, waste management a...",104750
3936,2016,6,"Administrative and support, waste management a...",113500
3955,2016,7,"Administrative and support, waste management a...",115750
3974,2016,8,"Administrative and support, waste management a...",122250
3993,2016,9,"Administrative and support, waste management a...",114000
4012,2016,10,"Administrative and support, waste management a...",109750


In [32]:
Business_building_and_other_support_services = pd.concat([MCE, AWR_reduced])
Business_building_and_other_support_services

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
9,2000,1,Management of companies and enterprises [55],1000
29,2000,2,Management of companies and enterprises [55],1500
49,2000,3,Management of companies and enterprises [55],1500
69,2000,4,Management of companies and enterprises [55],1000
89,2000,5,Management of companies and enterprises [55],1000
...,...,...,...,...
5373,1999,8,"Administrative and support, waste management a...",68750
5393,1999,9,"Administrative and support, waste management a...",65750
5413,1999,10,"Administrative and support, waste management a...",59000
5433,1999,11,"Administrative and support, waste management a...",62250


In [33]:
Business_building_and_other_support_services = Business_building_and_other_support_services.replace(
    'Management of companies and enterprises [55]', 'Business, building and other support services')
Business_building_and_other_support_services = Business_building_and_other_support_services.replace(
    'Administrative and support, waste management and remediation services [56]', 'Business, building and other support services')
Business_building_and_other_support_services

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
9,2000,1,"Business, building and other support services",1000
29,2000,2,"Business, building and other support services",1500
49,2000,3,"Business, building and other support services",1500
69,2000,4,"Business, building and other support services",1000
89,2000,5,"Business, building and other support services",1000
...,...,...,...,...
5373,1999,8,"Business, building and other support services",68750
5393,1999,9,"Business, building and other support services",65750
5413,1999,10,"Business, building and other support services",59000
5433,1999,11,"Business, building and other support services",62250


In [34]:
df1 = Business_building_and_other_support_services.sort_values(['SYEAR', 'SMTH'])
df1.head(50)

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
4761,1997,1,"Business, building and other support services",1750
4753,1997,1,"Business, building and other support services",58500
4781,1997,2,"Business, building and other support services",500
4773,1997,2,"Business, building and other support services",60250
4801,1997,3,"Business, building and other support services",1000
4793,1997,3,"Business, building and other support services",55500
4821,1997,4,"Business, building and other support services",500
4813,1997,4,"Business, building and other support services",61000
4841,1997,5,"Business, building and other support services",500
4833,1997,5,"Business, building and other support services",60500


### Executing all aforementioned steps in one cell 

In [35]:
df = df1.drop(['SYEAR', 'SMTH', 'NAICS'], axis = 1)
df

df.index = np.arange(1,len(df)+1)
df = df.reset_index()
df

df = df.set_index('index')
df_odd = df.loc[df.index.values % 2 == 1]
df_even = df.loc[df.index.values % 2 == 0]
df_even = df_even.set_index(df_even.index.values - 1)
new = df_odd.add(df_even, fill_value = 0)
new = new.reset_index().reset_index()
new

df2 = df1[np.arange(len(df)) % 2 == 0]
df2 = df2.reset_index().reset_index()
df2

df3 = pd.merge(df2, new, how = 'inner', on = 'level_0')
df3 = df3.drop(['level_0', 'index_x', '_EMPLOYMENT__x', 'index_y'], axis = 1)
df3.columns = ['SYEAR', 'SMTH', 'NAICS', '_EMPLOYMENT_']
Business_building_and_other_support_services = df3
Business_building_and_other_support_services.tail(30)

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
198,2013,7,"Business, building and other support services",109000
199,2013,8,"Business, building and other support services",100750
200,2013,9,"Business, building and other support services",98500
201,2013,10,"Business, building and other support services",96000
202,2013,11,"Business, building and other support services",91750
203,2013,12,"Business, building and other support services",91000
204,2014,1,"Business, building and other support services",86250
205,2014,2,"Business, building and other support services",79500
206,2014,3,"Business, building and other support services",82750
207,2014,4,"Business, building and other support services",93500


Adding removed data and renaming the coulmns now

In [36]:
Business_building_and_other_support_services = pd.concat([Business_building_and_other_support_services, AWR_removed_data])
Business_building_and_other_support_services.columns = ['SYEAR', 'SMTH', 'LMO_Detailed_Industry', 'Employment']
Business_building_and_other_support_services

Unnamed: 0,SYEAR,SMTH,LMO_Detailed_Industry,Employment
0,1997,1,"Business, building and other support services",60250
1,1997,2,"Business, building and other support services",60750
2,1997,3,"Business, building and other support services",56500
3,1997,4,"Business, building and other support services",61500
4,1997,5,"Business, building and other support services",61000
...,...,...,...,...
4658,2019,8,"Administrative and support, waste management a...",117750
4677,2019,9,"Administrative and support, waste management a...",109750
4696,2019,10,"Administrative and support, waste management a...",0
4715,2019,11,"Administrative and support, waste management a...",0


## Getting  *Local and Indigenous public administration*  from Local, municipal and regional public administration[913], Aboriginal public administration[914] & International and other extra-territorial public administration[919]

The whole process was repeated to obatin the *Local and Indigenous public administration*. After changing all column names to *Business, building and other support services*, the dataframe was sorted by the **SYEAR and SMTH** columns. Then, the *Employment* column was segregated to add up three consecutives rows to get the sum of empoyments from [913], [914] and [919].

After that, the dataframe was merged back and the columns names were changed to reflect *Data Output Template*.

In [37]:
LMRPA = data.loc[data['NAICS'].str.contains('913', na = False)]
LMRPA

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
5518,2000,1,"Local, municipal and regional public administr...",28000
5620,2000,2,"Local, municipal and regional public administr...",25250
5722,2000,3,"Local, municipal and regional public administr...",30500
5824,2000,4,"Local, municipal and regional public administr...",26750
5926,2000,5,"Local, municipal and regional public administr...",30500
...,...,...,...,...
33143,1999,8,"Local, municipal and regional public administr...",32250
33246,1999,9,"Local, municipal and regional public administr...",26000
33349,1999,10,"Local, municipal and regional public administr...",27000
33452,1999,11,"Local, municipal and regional public administr...",24750


In [38]:
Aboriginal_public_administration = data.loc[data['NAICS'].str.contains('914', na = False)]
Aboriginal_public_administration

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
5472,2000,1,Aboriginal public administration[914],500
5574,2000,2,Aboriginal public administration[914],1000
5676,2000,3,Aboriginal public administration[914],1750
5778,2000,4,Aboriginal public administration[914],2000
5880,2000,5,Aboriginal public administration[914],1500
...,...,...,...,...
33097,1999,8,Aboriginal public administration[914],1750
33200,1999,9,Aboriginal public administration[914],1500
33303,1999,10,Aboriginal public administration[914],750
33406,1999,11,Aboriginal public administration[914],1250


In [39]:
IETPA = data.loc[data['NAICS'].str.contains('919', na = False)]
IETPA

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
5515,2000,1,International and other extra-territorial publ...,750
5617,2000,2,International and other extra-territorial publ...,500
5719,2000,3,International and other extra-territorial publ...,750
5821,2000,4,International and other extra-territorial publ...,500
5923,2000,5,International and other extra-territorial publ...,0
...,...,...,...,...
33140,1999,8,International and other extra-territorial publ...,250
33243,1999,9,International and other extra-territorial publ...,0
33346,1999,10,International and other extra-territorial publ...,0
33449,1999,11,International and other extra-territorial publ...,750


In [40]:
Local_and_Indigenous_public_administration = pd.concat([LMRPA, Aboriginal_public_administration, IETPA])
Local_and_Indigenous_public_administration

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
5518,2000,1,"Local, municipal and regional public administr...",28000
5620,2000,2,"Local, municipal and regional public administr...",25250
5722,2000,3,"Local, municipal and regional public administr...",30500
5824,2000,4,"Local, municipal and regional public administr...",26750
5926,2000,5,"Local, municipal and regional public administr...",30500
...,...,...,...,...
33140,1999,8,International and other extra-territorial publ...,250
33243,1999,9,International and other extra-territorial publ...,0
33346,1999,10,International and other extra-territorial publ...,0
33449,1999,11,International and other extra-territorial publ...,750


In [41]:
Local_and_Indigenous_public_administration = Local_and_Indigenous_public_administration.replace(
    'Local, municipal and regional public administration[913]', 'Local and Indigenous public administration')
Local_and_Indigenous_public_administration = Local_and_Indigenous_public_administration.replace(
    'Aboriginal public administration[914]','Local and Indigenous public administration')
Local_and_Indigenous_public_administration = Local_and_Indigenous_public_administration.replace(
    'International and other extra-territorial public administration[919]', 'Local and Indigenous public administration')
Local_and_Indigenous_public_administration

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
5518,2000,1,Local and Indigenous public administration,28000
5620,2000,2,Local and Indigenous public administration,25250
5722,2000,3,Local and Indigenous public administration,30500
5824,2000,4,Local and Indigenous public administration,26750
5926,2000,5,Local and Indigenous public administration,30500
...,...,...,...,...
33140,1999,8,Local and Indigenous public administration,250
33243,1999,9,Local and Indigenous public administration,0
33346,1999,10,Local and Indigenous public administration,0
33449,1999,11,Local and Indigenous public administration,750


In [42]:
df1 = Local_and_Indigenous_public_administration.sort_values(['SYEAR', 'SMTH'])
df1.head(50)

Unnamed: 0,SYEAR,SMTH,NAICS,_EMPLOYMENT_
29950,1997,1,Local and Indigenous public administration,32500
29904,1997,1,Local and Indigenous public administration,500
29947,1997,1,Local and Indigenous public administration,0
30053,1997,2,Local and Indigenous public administration,32500
30007,1997,2,Local and Indigenous public administration,750
30050,1997,2,Local and Indigenous public administration,0
30156,1997,3,Local and Indigenous public administration,32500
30110,1997,3,Local and Indigenous public administration,1250
30153,1997,3,Local and Indigenous public administration,0
30259,1997,4,Local and Indigenous public administration,33750


### Executing the rest of the aforementioned steps in the subsequent cells

In [43]:
df = df1.drop(['SYEAR', 'SMTH', 'NAICS'], axis = 1)
df

df.index = np.arange(1,len(df)+1)
df = df.reset_index()

df = df.set_index('index')
df_first = df.loc[df.index.values % 3 == 1].reset_index()
df_second = df.loc[df.index.values % 3 == 2].reset_index()
df_third = df.loc[df.index.values % 3 == 0].reset_index()

In [44]:
new  = pd.merge(df_first, df_second, left_index=True, right_index=True)
new  = pd.merge(new, df_third, left_index=True, right_index=True)
new['EMPLOYMENT'] = new['_EMPLOYMENT__x'] + new['_EMPLOYMENT__y'] + new['_EMPLOYMENT_']
new = new['EMPLOYMENT']
new.to_frame()

Unnamed: 0,EMPLOYMENT
0,33000
1,33250
2,33750
3,34500
4,32750
...,...
271,42000
272,45750
273,0
274,0


In [45]:
df2 = df1[np.arange(len(df)) % 3 == 0]
df2 = df2.reset_index().reset_index()
df2

Unnamed: 0,level_0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,0,29950,1997,1,Local and Indigenous public administration,32500
1,1,30053,1997,2,Local and Indigenous public administration,32500
2,2,30156,1997,3,Local and Indigenous public administration,32500
3,3,30259,1997,4,Local and Indigenous public administration,33750
4,4,30362,1997,5,Local and Indigenous public administration,31500
...,...,...,...,...,...,...
271,271,29445,2019,8,Local and Indigenous public administration,40250
272,272,29546,2019,9,Local and Indigenous public administration,43500
273,273,29647,2019,10,Local and Indigenous public administration,0
274,274,29748,2019,11,Local and Indigenous public administration,0


In [46]:
df3 = pd.merge(df2, new, left_index=True, right_index=True)
df3 = df3.drop(['level_0', 'index', '_EMPLOYMENT_'], axis = 1)
df3.columns = ['SYEAR', 'SMTH', 'LMO_Detailed_Industry', 'Employment']
Local_and_Indigenous_public_administration = df3
Local_and_Indigenous_public_administration.head(30)

Unnamed: 0,SYEAR,SMTH,LMO_Detailed_Industry,Employment
0,1997,1,Local and Indigenous public administration,33000
1,1997,2,Local and Indigenous public administration,33250
2,1997,3,Local and Indigenous public administration,33750
3,1997,4,Local and Indigenous public administration,34500
4,1997,5,Local and Indigenous public administration,32750
5,1997,6,Local and Indigenous public administration,32750
6,1997,7,Local and Indigenous public administration,32000
7,1997,8,Local and Indigenous public administration,33500
8,1997,9,Local and Indigenous public administration,32250
9,1997,10,Local and Indigenous public administration,34500


## Getting  *Broadcasting, data processing, and information*  from 	Broadcasting (except Internet)[515], Data processing, hosting, and related services[518] & International and other Other information services[519]

A new method was applied to obatin the *Broadcasting, data processing, and information*. The three extracted dataframes were merged on their indexes. Then, all the *Employment* columns were added to get the sum of empoyments from [515], [518] and [519]. 

After that, the other columns were removed, the NAICS column name was cahnged to *Broadcasting, data processing, and information* and the columns names were changed to reflect *Data Output Template*.

In [47]:
Broadcasting_exceptInternet = data.loc[data['NAICS'].str.contains('515', na = False)].reset_index()
Broadcasting_exceptInternet

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,5480,2000,1,Broadcasting (except Internet)[515],4750
1,5582,2000,2,Broadcasting (except Internet)[515],5500
2,5684,2000,3,Broadcasting (except Internet)[515],5250
3,5786,2000,4,Broadcasting (except Internet)[515],6250
4,5888,2000,5,Broadcasting (except Internet)[515],4250
...,...,...,...,...,...
271,33105,1999,8,Broadcasting (except Internet)[515],4000
272,33208,1999,9,Broadcasting (except Internet)[515],3250
273,33311,1999,10,Broadcasting (except Internet)[515],4000
274,33414,1999,11,Broadcasting (except Internet)[515],4750


In [48]:
Data_processing_hosting = data.loc[data['NAICS'].str.contains('518', na = False)].reset_index()
Data_processing_hosting

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,5492,2000,1,"Data processing, hosting, and related services...",750
1,5594,2000,2,"Data processing, hosting, and related services...",750
2,5696,2000,3,"Data processing, hosting, and related services...",750
3,5798,2000,4,"Data processing, hosting, and related services...",250
4,5900,2000,5,"Data processing, hosting, and related services...",250
...,...,...,...,...,...
271,33117,1999,8,"Data processing, hosting, and related services...",3250
272,33220,1999,9,"Data processing, hosting, and related services...",2250
273,33323,1999,10,"Data processing, hosting, and related services...",2250
274,33426,1999,11,"Data processing, hosting, and related services...",1500


In [49]:
other_information_services = data.loc[data['NAICS'].str.contains('519', na = False)].reset_index()
other_information_services

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,5534,2000,1,Other information services[519],2250
1,5636,2000,2,Other information services[519],2500
2,5738,2000,3,Other information services[519],3000
3,5840,2000,4,Other information services[519],2000
4,5942,2000,5,Other information services[519],2500
...,...,...,...,...,...
271,33160,1999,8,Other information services[519],3250
272,33263,1999,9,Other information services[519],3250
273,33366,1999,10,Other information services[519],3000
274,33469,1999,11,Other information services[519],4500


In [50]:
Broadcasting_data_processing_and_information = pd.merge(
    Broadcasting_exceptInternet, Data_processing_hosting, left_index=True, right_index=True)
Broadcasting_data_processing_and_information = pd.merge(
    Broadcasting_data_processing_and_information, other_information_services, left_index=True, right_index=True)
Broadcasting_data_processing_and_information

Unnamed: 0,index_x,SYEAR_x,SMTH_x,NAICS_x,_EMPLOYMENT__x,index_y,SYEAR_y,SMTH_y,NAICS_y,_EMPLOYMENT__y,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,5480,2000,1,Broadcasting (except Internet)[515],4750,5492,2000,1,"Data processing, hosting, and related services...",750,5534,2000,1,Other information services[519],2250
1,5582,2000,2,Broadcasting (except Internet)[515],5500,5594,2000,2,"Data processing, hosting, and related services...",750,5636,2000,2,Other information services[519],2500
2,5684,2000,3,Broadcasting (except Internet)[515],5250,5696,2000,3,"Data processing, hosting, and related services...",750,5738,2000,3,Other information services[519],3000
3,5786,2000,4,Broadcasting (except Internet)[515],6250,5798,2000,4,"Data processing, hosting, and related services...",250,5840,2000,4,Other information services[519],2000
4,5888,2000,5,Broadcasting (except Internet)[515],4250,5900,2000,5,"Data processing, hosting, and related services...",250,5942,2000,5,Other information services[519],2500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
271,33105,1999,8,Broadcasting (except Internet)[515],4000,33117,1999,8,"Data processing, hosting, and related services...",3250,33160,1999,8,Other information services[519],3250
272,33208,1999,9,Broadcasting (except Internet)[515],3250,33220,1999,9,"Data processing, hosting, and related services...",2250,33263,1999,9,Other information services[519],3250
273,33311,1999,10,Broadcasting (except Internet)[515],4000,33323,1999,10,"Data processing, hosting, and related services...",2250,33366,1999,10,Other information services[519],3000
274,33414,1999,11,Broadcasting (except Internet)[515],4750,33426,1999,11,"Data processing, hosting, and related services...",1500,33469,1999,11,Other information services[519],4500


In [51]:
Broadcasting_data_processing_and_information = Broadcasting_data_processing_and_information.drop([
    'index_x', 'SYEAR_x', 'SMTH_x', 'NAICS_x', 'index_y', 'SYEAR_y', 'SMTH_y', 'NAICS_y', 'index'], axis =1)
Broadcasting_data_processing_and_information

Unnamed: 0,_EMPLOYMENT__x,_EMPLOYMENT__y,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,4750,750,2000,1,Other information services[519],2250
1,5500,750,2000,2,Other information services[519],2500
2,5250,750,2000,3,Other information services[519],3000
3,6250,250,2000,4,Other information services[519],2000
4,4250,250,2000,5,Other information services[519],2500
...,...,...,...,...,...,...
271,4000,3250,1999,8,Other information services[519],3250
272,3250,2250,1999,9,Other information services[519],3250
273,4000,2250,1999,10,Other information services[519],3000
274,4750,1500,1999,11,Other information services[519],4500


In [52]:
Broadcasting_data_processing_and_information['EMPLOYMENT'] =(Broadcasting_data_processing_and_information['_EMPLOYMENT__x'] + 
Broadcasting_data_processing_and_information['_EMPLOYMENT__y'] + Broadcasting_data_processing_and_information['_EMPLOYMENT_'])
Broadcasting_data_processing_and_information = Broadcasting_data_processing_and_information.drop(
    ['_EMPLOYMENT__x', '_EMPLOYMENT__y', '_EMPLOYMENT_'], axis = 1)
Broadcasting_data_processing_and_information = Broadcasting_data_processing_and_information.replace(
    'Other information services[519]', 'Broadcasting data processing and information')
Broadcasting_data_processing_and_information.columns = ['SYEAR', 'SMTH', 'LMO_Detailed_Industry', 'Employment']
Broadcasting_data_processing_and_information

Unnamed: 0,SYEAR,SMTH,LMO_Detailed_Industry,Employment
0,2000,1,Broadcasting data processing and information,7750
1,2000,2,Broadcasting data processing and information,8750
2,2000,3,Broadcasting data processing and information,9000
3,2000,4,Broadcasting data processing and information,8500
4,2000,5,Broadcasting data processing and information,7000
...,...,...,...,...
271,1999,8,Broadcasting data processing and information,10500
272,1999,9,Broadcasting data processing and information,8750
273,1999,10,Broadcasting data processing and information,9250
274,1999,11,Broadcasting data processing and information,10750


## Getting  *Transit, sightseeing, and pipeline transportation*  from Transit and ground passenger transportation[485], Pipeline transportation[486] & Scenic and sightseeing transportation[487]	

The new method was applied to obatin the *Transit, sightseeing, and pipeline transportation*. The three extracted dataframes were merged on their indexes. Then, all the *Employment* columns were added to get the sum of empoyments from [485], [486] and [487]. 

After that, the other columns were removed, the NAICS column name was cahnged to *Transit, sightseeing, and pipeline transportation* and the columns names were changed to reflect *Data Output Template*.

In [53]:
Transit_and_ground_passenger_transportation = data.loc[data['NAICS'].str.contains('485', na = False)].reset_index()
Transit_and_ground_passenger_transportation

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,5566,2000,1,Transit and ground passenger transportation[485],13250
1,5668,2000,2,Transit and ground passenger transportation[485],15500
2,5770,2000,3,Transit and ground passenger transportation[485],14000
3,5872,2000,4,Transit and ground passenger transportation[485],10750
4,5974,2000,5,Transit and ground passenger transportation[485],12500
...,...,...,...,...,...
271,33192,1999,8,Transit and ground passenger transportation[485],15750
272,33295,1999,9,Transit and ground passenger transportation[485],15750
273,33398,1999,10,Transit and ground passenger transportation[485],16750
274,33501,1999,11,Transit and ground passenger transportation[485],15750


In [54]:
Pipeline_transportation = data.loc[data['NAICS'].str.contains('486', na = False)].reset_index()
Pipeline_transportation

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,5541,2000,1,Pipeline transportation[486],750
1,5643,2000,2,Pipeline transportation[486],1000
2,5745,2000,3,Pipeline transportation[486],1250
3,5847,2000,4,Pipeline transportation[486],500
4,5949,2000,5,Pipeline transportation[486],250
...,...,...,...,...,...
271,33167,1999,8,Pipeline transportation[486],250
272,33270,1999,9,Pipeline transportation[486],500
273,33373,1999,10,Pipeline transportation[486],1000
274,33476,1999,11,Pipeline transportation[486],750


In [55]:
Scenic_and_sightseeing_transportation = data.loc[data['NAICS'].str.contains('487', na = False)].reset_index()
Scenic_and_sightseeing_transportation

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,5555,2000,1,Scenic and sightseeing transportation[487],500
1,5657,2000,2,Scenic and sightseeing transportation[487],750
2,5759,2000,3,Scenic and sightseeing transportation[487],1750
3,5861,2000,4,Scenic and sightseeing transportation[487],1750
4,5963,2000,5,Scenic and sightseeing transportation[487],500
...,...,...,...,...,...
271,33181,1999,8,Scenic and sightseeing transportation[487],1750
272,33284,1999,9,Scenic and sightseeing transportation[487],1250
273,33387,1999,10,Scenic and sightseeing transportation[487],1250
274,33490,1999,11,Scenic and sightseeing transportation[487],500


In [56]:
Transit_sightseeing_and_pipeline_transportation = pd.merge(
    Transit_and_ground_passenger_transportation, Pipeline_transportation, left_index=True, right_index=True)
Transit_sightseeing_and_pipeline_transportation = pd.merge(
    Transit_sightseeing_and_pipeline_transportation, Scenic_and_sightseeing_transportation, left_index=True, right_index=True)
Transit_sightseeing_and_pipeline_transportation

Unnamed: 0,index_x,SYEAR_x,SMTH_x,NAICS_x,_EMPLOYMENT__x,index_y,SYEAR_y,SMTH_y,NAICS_y,_EMPLOYMENT__y,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,5566,2000,1,Transit and ground passenger transportation[485],13250,5541,2000,1,Pipeline transportation[486],750,5555,2000,1,Scenic and sightseeing transportation[487],500
1,5668,2000,2,Transit and ground passenger transportation[485],15500,5643,2000,2,Pipeline transportation[486],1000,5657,2000,2,Scenic and sightseeing transportation[487],750
2,5770,2000,3,Transit and ground passenger transportation[485],14000,5745,2000,3,Pipeline transportation[486],1250,5759,2000,3,Scenic and sightseeing transportation[487],1750
3,5872,2000,4,Transit and ground passenger transportation[485],10750,5847,2000,4,Pipeline transportation[486],500,5861,2000,4,Scenic and sightseeing transportation[487],1750
4,5974,2000,5,Transit and ground passenger transportation[485],12500,5949,2000,5,Pipeline transportation[486],250,5963,2000,5,Scenic and sightseeing transportation[487],500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
271,33192,1999,8,Transit and ground passenger transportation[485],15750,33167,1999,8,Pipeline transportation[486],250,33181,1999,8,Scenic and sightseeing transportation[487],1750
272,33295,1999,9,Transit and ground passenger transportation[485],15750,33270,1999,9,Pipeline transportation[486],500,33284,1999,9,Scenic and sightseeing transportation[487],1250
273,33398,1999,10,Transit and ground passenger transportation[485],16750,33373,1999,10,Pipeline transportation[486],1000,33387,1999,10,Scenic and sightseeing transportation[487],1250
274,33501,1999,11,Transit and ground passenger transportation[485],15750,33476,1999,11,Pipeline transportation[486],750,33490,1999,11,Scenic and sightseeing transportation[487],500


In [57]:
Transit_sightseeing_and_pipeline_transportation = Transit_sightseeing_and_pipeline_transportation.drop([
    'index_x', 'SYEAR_x', 'SMTH_x', 'NAICS_x', 'index_y', 'SYEAR_y', 'SMTH_y', 'NAICS_y', 'index'], axis =1)
Transit_sightseeing_and_pipeline_transportation

Unnamed: 0,_EMPLOYMENT__x,_EMPLOYMENT__y,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,13250,750,2000,1,Scenic and sightseeing transportation[487],500
1,15500,1000,2000,2,Scenic and sightseeing transportation[487],750
2,14000,1250,2000,3,Scenic and sightseeing transportation[487],1750
3,10750,500,2000,4,Scenic and sightseeing transportation[487],1750
4,12500,250,2000,5,Scenic and sightseeing transportation[487],500
...,...,...,...,...,...,...
271,15750,250,1999,8,Scenic and sightseeing transportation[487],1750
272,15750,500,1999,9,Scenic and sightseeing transportation[487],1250
273,16750,1000,1999,10,Scenic and sightseeing transportation[487],1250
274,15750,750,1999,11,Scenic and sightseeing transportation[487],500


In [58]:
Transit_sightseeing_and_pipeline_transportation['EMPLOYMENT'] =(Transit_sightseeing_and_pipeline_transportation['_EMPLOYMENT__x'] + 
Transit_sightseeing_and_pipeline_transportation['_EMPLOYMENT__y'] + Transit_sightseeing_and_pipeline_transportation['_EMPLOYMENT_'])
Transit_sightseeing_and_pipeline_transportation = Transit_sightseeing_and_pipeline_transportation.drop(
    ['_EMPLOYMENT__x', '_EMPLOYMENT__y', '_EMPLOYMENT_'], axis = 1)
Transit_sightseeing_and_pipeline_transportation = Transit_sightseeing_and_pipeline_transportation.replace(
    'Scenic and sightseeing transportation[487]', 'Transit, sightseeing, and pipeline transportation')
Transit_sightseeing_and_pipeline_transportation.columns = ['SYEAR', 'SMTH', 'LMO_Detailed_Industry', 'Employment']
Transit_sightseeing_and_pipeline_transportation

Unnamed: 0,SYEAR,SMTH,LMO_Detailed_Industry,Employment
0,2000,1,"Transit, sightseeing, and pipeline transportation",14500
1,2000,2,"Transit, sightseeing, and pipeline transportation",17250
2,2000,3,"Transit, sightseeing, and pipeline transportation",17000
3,2000,4,"Transit, sightseeing, and pipeline transportation",13000
4,2000,5,"Transit, sightseeing, and pipeline transportation",13250
...,...,...,...,...
271,1999,8,"Transit, sightseeing, and pipeline transportation",17750
272,1999,9,"Transit, sightseeing, and pipeline transportation",17500
273,1999,10,"Transit, sightseeing, and pipeline transportation",19000
274,1999,11,"Transit, sightseeing, and pipeline transportation",17000


## Getting  *Finance*  from Monetary authorities - central bank[521], Credit intermediation and related activities[522]	, International and Blank[523] & Funds and other financial vehicles[526]	

The new method was applied to obatin the *Finance*. The four extracted dataframes were merged on their indexes. Then, all the *Employment* columns were added to get the sum of empoyments from [521], [522], [523], and [526]. **This time around [523] had no rows as proven below. Also, [521] contains data for only 1997-1999, as such a little data manipulation was done to obtain the right results.**

After that, the other columns were removed, the NAICS column name was cahnged to *Finance* and the columns names were changed to reflect *Data Output Template*.

In [59]:
Monetary_authorities_central_bank = data.loc[data['NAICS'].str.contains('521', na = False)].reset_index()
Monetary_authorities_central_bank

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,29958,1997,1,Monetary authorities - central bank[521],0
1,30061,1997,2,Monetary authorities - central bank[521],0
2,30164,1997,3,Monetary authorities - central bank[521],0
3,30267,1997,4,Monetary authorities - central bank[521],0
4,30370,1997,5,Monetary authorities - central bank[521],0
5,30473,1997,6,Monetary authorities - central bank[521],0
6,30576,1997,7,Monetary authorities - central bank[521],0
7,30679,1997,8,Monetary authorities - central bank[521],0
8,30782,1997,9,Monetary authorities - central bank[521],0
9,30885,1997,10,Monetary authorities - central bank[521],0


In [60]:
Credit_intermediation_and_related_activities = data.loc[data['NAICS'].str.contains('522', na = False)].reset_index()
Credit_intermediation_and_related_activities

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,5490,2000,1,Credit intermediation and related activities[522],38000
1,5592,2000,2,Credit intermediation and related activities[522],40500
2,5694,2000,3,Credit intermediation and related activities[522],42000
3,5796,2000,4,Credit intermediation and related activities[522],41250
4,5898,2000,5,Credit intermediation and related activities[522],40250
...,...,...,...,...,...
271,33115,1999,8,Credit intermediation and related activities[522],37250
272,33218,1999,9,Credit intermediation and related activities[522],38500
273,33321,1999,10,Credit intermediation and related activities[522],37750
274,33424,1999,11,Credit intermediation and related activities[522],37250


In [61]:
blank_for_523 = data.loc[data['NAICS'].str.contains('523', na = False)].reset_index()
blank_for_523

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_


In [62]:
Funds_and_other_financial_vehicles = data.loc[data['NAICS'].str.contains('526', na = False)].reset_index()
Funds_and_other_financial_vehicles.columns = ['index', 'SYEAR', 'SMTH', 'NAICS', '_EMPLOYMENT_']
Funds_and_other_financial_vehicles

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,5505,2000,1,Funds and other financial vehicles[526],0
1,5607,2000,2,Funds and other financial vehicles[526],0
2,5709,2000,3,Funds and other financial vehicles[526],0
3,5811,2000,4,Funds and other financial vehicles[526],0
4,5913,2000,5,Funds and other financial vehicles[526],0
...,...,...,...,...,...
271,33130,1999,8,Funds and other financial vehicles[526],0
272,33233,1999,9,Funds and other financial vehicles[526],0
273,33336,1999,10,Funds and other financial vehicles[526],0
274,33439,1999,11,Funds and other financial vehicles[526],0


In [63]:
add_on = Funds_and_other_financial_vehicles.loc[(Funds_and_other_financial_vehicles['SYEAR'] != 1997) & (Funds_and_other_financial_vehicles['SYEAR'] != 1998) & (Funds_and_other_financial_vehicles['SYEAR'] != 1999)]
add_on = add_on.drop('_EMPLOYMENT_', axis =1)
add_on['_EMPLOYMENT_'] = 0
Monetary_authorities_central_bank = pd.concat([Monetary_authorities_central_bank, add_on])
Monetary_authorities_central_bank

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,29958,1997,1,Monetary authorities - central bank[521],0
1,30061,1997,2,Monetary authorities - central bank[521],0
2,30164,1997,3,Monetary authorities - central bank[521],0
3,30267,1997,4,Monetary authorities - central bank[521],0
4,30370,1997,5,Monetary authorities - central bank[521],0
...,...,...,...,...,...
235,29432,2019,8,Funds and other financial vehicles[526],0
236,29533,2019,9,Funds and other financial vehicles[526],0
237,29634,2019,10,Funds and other financial vehicles[526],0
238,29735,2019,11,Funds and other financial vehicles[526],0


In [64]:
Finance = pd.merge(
   Monetary_authorities_central_bank,Credit_intermediation_and_related_activities, left_index=True, right_index=True)
Finance = pd.merge(
    Finance, Funds_and_other_financial_vehicles, left_index=True, right_index=True)
Finance

Unnamed: 0,index_x,SYEAR_x,SMTH_x,NAICS_x,_EMPLOYMENT__x,index_y,SYEAR_y,SMTH_y,NAICS_y,_EMPLOYMENT__y,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,29958,1997,1,Monetary authorities - central bank[521],0,5490,2000,1,Credit intermediation and related activities[522],38000,5505,2000,1,Funds and other financial vehicles[526],0
0,5505,2000,1,Funds and other financial vehicles[526],0,5490,2000,1,Credit intermediation and related activities[522],38000,5505,2000,1,Funds and other financial vehicles[526],0
1,30061,1997,2,Monetary authorities - central bank[521],0,5592,2000,2,Credit intermediation and related activities[522],40500,5607,2000,2,Funds and other financial vehicles[526],0
1,5607,2000,2,Funds and other financial vehicles[526],0,5592,2000,2,Credit intermediation and related activities[522],40500,5607,2000,2,Funds and other financial vehicles[526],0
2,30164,1997,3,Monetary authorities - central bank[521],0,5694,2000,3,Credit intermediation and related activities[522],42000,5709,2000,3,Funds and other financial vehicles[526],0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,29432,2019,8,Funds and other financial vehicles[526],0,29417,2019,8,Credit intermediation and related activities[522],42750,29432,2019,8,Funds and other financial vehicles[526],250
236,29533,2019,9,Funds and other financial vehicles[526],0,29518,2019,9,Credit intermediation and related activities[522],41750,29533,2019,9,Funds and other financial vehicles[526],250
237,29634,2019,10,Funds and other financial vehicles[526],0,29619,2019,10,Credit intermediation and related activities[522],0,29634,2019,10,Funds and other financial vehicles[526],0
238,29735,2019,11,Funds and other financial vehicles[526],0,29720,2019,11,Credit intermediation and related activities[522],0,29735,2019,11,Funds and other financial vehicles[526],0


In [65]:
Finance = Finance.drop([
    'index_x', 'SYEAR_x', 'SMTH_x', 'NAICS_x', 'index_y', 'SYEAR_y', 'SMTH_y', 'NAICS_y', 'index'], axis =1)
Finance

Unnamed: 0,_EMPLOYMENT__x,_EMPLOYMENT__y,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,0,38000,2000,1,Funds and other financial vehicles[526],0
0,0,38000,2000,1,Funds and other financial vehicles[526],0
1,0,40500,2000,2,Funds and other financial vehicles[526],0
1,0,40500,2000,2,Funds and other financial vehicles[526],0
2,0,42000,2000,3,Funds and other financial vehicles[526],0
...,...,...,...,...,...,...
235,0,42750,2019,8,Funds and other financial vehicles[526],250
236,0,41750,2019,9,Funds and other financial vehicles[526],250
237,0,0,2019,10,Funds and other financial vehicles[526],0
238,0,0,2019,11,Funds and other financial vehicles[526],0


In [66]:
Finance['EMPLOYMENT'] =(Finance['_EMPLOYMENT__x'] + 
Finance['_EMPLOYMENT__y'] + Finance['_EMPLOYMENT_'])
Finance = Finance.drop(
    ['_EMPLOYMENT__x', '_EMPLOYMENT__y', '_EMPLOYMENT_'], axis = 1)
Finance = Finance.replace(
    'Funds and other financial vehicles[526]', 'Finance')
Finance.columns = ['SYEAR', 'SMTH', 'LMO_Detailed_Industry', 'Employment']
Finance

Unnamed: 0,SYEAR,SMTH,LMO_Detailed_Industry,Employment
0,2000,1,Finance,38000
0,2000,1,Finance,38000
1,2000,2,Finance,40500
1,2000,2,Finance,40500
2,2000,3,Finance,42000
...,...,...,...,...
235,2019,8,Finance,43000
236,2019,9,Finance,42000
237,2019,10,Finance,0
238,2019,11,Finance,0


## Getting  *Private and trades education*  from Business and secretarial schools[6114], Technical and trade schools[6115]	,  Other schools and instruction[6116] & Educational support services[6117]	

A new method was applied to obatin the *Private and trades education*. **The four extracted dataframes' __EMPLOYMENT__ were added to get the accurate results without merge methods.

In [67]:
BSS = data.loc[data['NAICS'] == 6114].reset_index()
BSS

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,33872,2000,1,6114,1000
1,34184,2000,2,6114,750
2,34496,2000,3,6114,1000
3,34808,2000,4,6114,1000
4,35120,2000,5,6114,500
...,...,...,...,...,...
271,117896,1999,8,6114,1250
272,118205,1999,9,6114,750
273,118514,1999,10,6114,1750
274,118823,1999,11,6114,2000


In [68]:
TTS = data.loc[data['NAICS'] == 6115].reset_index()
TTS

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,33873,2000,1,6115,2750
1,34185,2000,2,6115,2750
2,34497,2000,3,6115,2500
3,34809,2000,4,6115,2000
4,35121,2000,5,6115,2000
...,...,...,...,...,...
271,117897,1999,8,6115,1500
272,118206,1999,9,6115,1000
273,118515,1999,10,6115,1750
274,118824,1999,11,6115,1250


In [69]:
OSI = data.loc[data['NAICS'] == 6116].reset_index()
OSI

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,33874,2000,1,6116,16750
1,34186,2000,2,6116,15250
2,34498,2000,3,6116,14750
3,34810,2000,4,6116,13000
4,35122,2000,5,6116,11500
...,...,...,...,...,...
271,117898,1999,8,6116,12250
272,118207,1999,9,6116,14500
273,118516,1999,10,6116,14000
274,118825,1999,11,6116,12250


In [70]:
ESS = data.loc[data['NAICS'] == 6117].reset_index()
ESS

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,33875,2000,1,6117,1000
1,34187,2000,2,6117,1500
2,34499,2000,3,6117,1750
3,34811,2000,4,6117,2750
4,35123,2000,5,6117,3250
...,...,...,...,...,...
271,117899,1999,8,6117,1750
272,118208,1999,9,6117,2000
273,118517,1999,10,6117,1250
274,118826,1999,11,6117,1750


In [71]:
ESS['EMPLOYMENT'] = BSS['_EMPLOYMENT_'] + OSI['_EMPLOYMENT_'] + TTS['_EMPLOYMENT_'] + ESS['_EMPLOYMENT_']
Private_and_trades_education = ESS

In [72]:
Private_and_trades_education = Private_and_trades_education.replace(6117, 'Private and trades education')
Private_and_trades_education = Private_and_trades_education.drop('_EMPLOYMENT_', axis= 1)
Private_and_trades_education

Unnamed: 0,index,SYEAR,SMTH,NAICS,EMPLOYMENT
0,33875,2000,1,Private and trades education,21500
1,34187,2000,2,Private and trades education,20250
2,34499,2000,3,Private and trades education,20000
3,34811,2000,4,Private and trades education,18750
4,35123,2000,5,Private and trades education,17250
...,...,...,...,...,...
271,117899,1999,8,Private and trades education,16750
272,118208,1999,9,Private and trades education,18250
273,118517,1999,10,Private and trades education,18750
274,118826,1999,11,Private and trades education,17250


## Getting  *Transportation equipment manufacturing (excluding shipbuilding)*  from Motor vehicle manufacturing[3361], Motor vehicle body and trailer manufacturing[3362],  Motor vehicle parts manufacturing[3363],  Aerospace product and parts manufacturing[3364], Railroad rolling stock manufacturing[3365] & Other transportation equipment manufacturing[3369]	

The new method was applied to obatin the **Transportation equipment manufacturing (excluding shipbuilding)**. **The six extracted dataframes' __EMPLOYMENT__ were added to get the accurate results without merge methods.**

In [73]:
Motor_vehicle_manufacturing = data.loc[data['NAICS'] == 3361].reset_index()
Motor_vehicle_manufacturing

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,33725,2000,1,3361,1000
1,34037,2000,2,3361,750
2,34349,2000,3,3361,1250
3,34661,2000,4,3361,500
4,34973,2000,5,3361,2000
...,...,...,...,...,...
271,117751,1999,8,3361,4250
272,118060,1999,9,3361,3500
273,118369,1999,10,3361,3250
274,118678,1999,11,3361,3500


In [74]:
Motor_vehicle_body_and_trailer_manufacturing = data.loc[data['NAICS'] == 3362].reset_index()
Motor_vehicle_body_and_trailer_manufacturing

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,33726,2000,1,3362,2250
1,34038,2000,2,3362,2250
2,34350,2000,3,3362,1750
3,34662,2000,4,3362,1500
4,34974,2000,5,3362,2500
...,...,...,...,...,...
271,117752,1999,8,3362,2000
272,118061,1999,9,3362,1750
273,118370,1999,10,3362,1250
274,118679,1999,11,3362,1250


In [75]:
Motor_vehicle_parts_manufacturing = data.loc[data['NAICS'] == 3363].reset_index()
Motor_vehicle_parts_manufacturing 

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,33727,2000,1,3363,1250
1,34039,2000,2,3363,750
2,34351,2000,3,3363,1750
3,34663,2000,4,3363,1500
4,34975,2000,5,3363,1500
...,...,...,...,...,...
271,117753,1999,8,3363,2250
272,118062,1999,9,3363,2500
273,118371,1999,10,3363,2250
274,118680,1999,11,3363,1000


In [76]:
Aerospace_product_and_parts_manufacturing = data.loc[data['NAICS'] == 3364].reset_index()
Aerospace_product_and_parts_manufacturing

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,33728,2000,1,3364,2250
1,34040,2000,2,3364,1500
2,34352,2000,3,3364,2500
3,34664,2000,4,3364,3750
4,34976,2000,5,3364,2000
...,...,...,...,...,...
271,117754,1999,8,3364,2250
272,118063,1999,9,3364,2250
273,118372,1999,10,3364,2000
274,118681,1999,11,3364,2250


In [77]:
Railroad_rolling_stock_manufacturing = data.loc[data['NAICS'] == 3365].reset_index()
Railroad_rolling_stock_manufacturing

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,33729,2000,1,3365,0
1,34041,2000,2,3365,500
2,34353,2000,3,3365,250
3,34665,2000,4,3365,1000
4,34977,2000,5,3365,1000
...,...,...,...,...,...
271,117755,1999,8,3365,0
272,118064,1999,9,3365,0
273,118373,1999,10,3365,0
274,118682,1999,11,3365,0


In [78]:
Other_transportation_equipment_manufacturing = data.loc[data['NAICS'] == 3369].reset_index()
Other_transportation_equipment_manufacturing

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,33731,2000,1,3369,0
1,34043,2000,2,3369,500
2,34355,2000,3,3369,250
3,34667,2000,4,3369,500
4,34979,2000,5,3369,250
...,...,...,...,...,...
271,117757,1999,8,3369,500
272,118066,1999,9,3369,500
273,118375,1999,10,3369,250
274,118684,1999,11,3369,0


In [79]:
Other_transportation_equipment_manufacturing['EMPLOYMENT'] = Other_transportation_equipment_manufacturing['_EMPLOYMENT_'] + Railroad_rolling_stock_manufacturing['_EMPLOYMENT_'] + Aerospace_product_and_parts_manufacturing['_EMPLOYMENT_'] + Motor_vehicle_parts_manufacturing['_EMPLOYMENT_'] + Motor_vehicle_body_and_trailer_manufacturing['_EMPLOYMENT_'] + Motor_vehicle_manufacturing['_EMPLOYMENT_']
Transportation_equipment_manufacturing_excluding_shipbuilding = Other_transportation_equipment_manufacturing

In [80]:
Transportation_equipment_manufacturing_excluding_shipbuilding = Transportation_equipment_manufacturing_excluding_shipbuilding.replace(3369, 
                                                    'Transportation equipment manufacturing - excluding shipbuilding')
Transportation_equipment_manufacturing_excluding_shipbuilding = Transportation_equipment_manufacturing_excluding_shipbuilding.drop(
    '_EMPLOYMENT_', axis= 1)
Transportation_equipment_manufacturing_excluding_shipbuilding

Unnamed: 0,index,SYEAR,SMTH,NAICS,EMPLOYMENT
0,33731,2000,1,Transportation equipment manufacturing - exclu...,6750
1,34043,2000,2,Transportation equipment manufacturing - exclu...,6250
2,34355,2000,3,Transportation equipment manufacturing - exclu...,7750
3,34667,2000,4,Transportation equipment manufacturing - exclu...,8750
4,34979,2000,5,Transportation equipment manufacturing - exclu...,9250
...,...,...,...,...,...
271,117757,1999,8,Transportation equipment manufacturing - exclu...,11250
272,118066,1999,9,Transportation equipment manufacturing - exclu...,10500
273,118375,1999,10,Transportation equipment manufacturing - exclu...,9000
274,118684,1999,11,Transportation equipment manufacturing - exclu...,8000


## Getting  *Legal, accounting, design, research, and advertising services*  from  Legal services[5411], Accounting, tax preparation, bookkeeping and payroll services[5412],  Specialized design services[5414],   Scientific research and development services[5417], Advertising, public relations, and related services[5418] & Other professional, scientific and technical services[5419]	

The new method was applied to obatin the **Transportation equipment manufacturing (excluding shipbuilding)**. **The six extracted dataframes' __EMPLOYMENT__ were added to get the accurate results without merge methods.**

In [81]:
LS = data.loc[data['NAICS'] == 5411].reset_index()
LS

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,33849,2000,1,5411,19750
1,34161,2000,2,5411,21250
2,34473,2000,3,5411,21250
3,34785,2000,4,5411,19500
4,35097,2000,5,5411,20000
...,...,...,...,...,...
271,117873,1999,8,5411,21750
272,118182,1999,9,5411,22000
273,118491,1999,10,5411,22250
274,118800,1999,11,5411,17750


In [82]:
ATPBP = data.loc[data['NAICS'] == 5412].reset_index()
ATPBP

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,33850,2000,1,5412,20750
1,34162,2000,2,5412,20750
2,34474,2000,3,5412,23250
3,34786,2000,4,5412,19000
4,35098,2000,5,5412,18000
...,...,...,...,...,...
271,117874,1999,8,5412,22500
272,118183,1999,9,5412,22250
273,118492,1999,10,5412,22250
274,118801,1999,11,5412,19500


In [83]:
SDS = data.loc[data['NAICS'] == 5414].reset_index()
SDS

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,33852,2000,1,5414,6000
1,34164,2000,2,5414,6000
2,34476,2000,3,5414,6750
3,34788,2000,4,5414,7250
4,35100,2000,5,5414,8750
...,...,...,...,...,...
271,117876,1999,8,5414,7500
272,118185,1999,9,5414,8000
273,118494,1999,10,5414,7500
274,118803,1999,11,5414,7750


In [84]:
SRDS = data.loc[data['NAICS'] == 5417].reset_index()
SRDS

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,33855,2000,1,5417,3250
1,34167,2000,2,5417,3250
2,34479,2000,3,5417,2750
3,34791,2000,4,5417,2250
4,35103,2000,5,5417,2750
...,...,...,...,...,...
271,117879,1999,8,5417,1500
272,118188,1999,9,5417,1000
273,118497,1999,10,5417,1250
274,118806,1999,11,5417,1000


In [85]:
APRRS = data.loc[data['NAICS'] == 5418].reset_index()
APRRS

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,33856,2000,1,5418,7000
1,34168,2000,2,5418,6500
2,34480,2000,3,5418,4000
3,34792,2000,4,5418,4500
4,35104,2000,5,5418,4500
...,...,...,...,...,...
271,117880,1999,8,5418,7500
272,118189,1999,9,5418,8250
273,118498,1999,10,5418,8000
274,118807,1999,11,5418,8000


In [86]:
OPSTS = data.loc[data['NAICS'] == 5419].reset_index()
OPSTS

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,33857,2000,1,5419,6500
1,34169,2000,2,5419,7250
2,34481,2000,3,5419,7000
3,34793,2000,4,5419,7750
4,35105,2000,5,5419,7750
...,...,...,...,...,...
271,117881,1999,8,5419,6000
272,118190,1999,9,5419,6500
273,118499,1999,10,5419,6500
274,118808,1999,11,5419,6250


In [87]:
OPSTS['EMPLOYMENT'] = LS['_EMPLOYMENT_'] + ATPBP['_EMPLOYMENT_'] + SDS['_EMPLOYMENT_'] + SRDS['_EMPLOYMENT_'] + APRRS['_EMPLOYMENT_'] + OPSTS['_EMPLOYMENT_']
Legal_accounting_design_research_and_advertising_services = OPSTS

In [88]:
Legal_accounting_design_research_and_advertising_services = Legal_accounting_design_research_and_advertising_services.replace(5419, 
                                                    'Legal, accounting, design research and advertising services')
Legal_accounting_design_research_and_advertising_services = Legal_accounting_design_research_and_advertising_services.drop(
    '_EMPLOYMENT_', axis= 1)
Legal_accounting_design_research_and_advertising_services

Unnamed: 0,index,SYEAR,SMTH,NAICS,EMPLOYMENT
0,33857,2000,1,"Legal, accounting, design research and adverti...",63250
1,34169,2000,2,"Legal, accounting, design research and adverti...",65000
2,34481,2000,3,"Legal, accounting, design research and adverti...",65000
3,34793,2000,4,"Legal, accounting, design research and adverti...",60250
4,35105,2000,5,"Legal, accounting, design research and adverti...",61750
...,...,...,...,...,...
271,117881,1999,8,"Legal, accounting, design research and adverti...",66750
272,118190,1999,9,"Legal, accounting, design research and adverti...",68000
273,118499,1999,10,"Legal, accounting, design research and adverti...",67750
274,118808,1999,11,"Legal, accounting, design research and adverti...",60250


## Getting  *Other retail trade(excluding cars and personal care)*  from  [442], [443],  [444],  [445], [446], [447], [448],  [451],   [452], [453] & [454]	

The new method was applied to obatin the **Other retail trade(excluding cars and personal care)**. **The eleven extracted dataframes' __EMPLOYMENT__ were added to get the accurate results without merge methods.**

In [90]:
Furniture_and_home_furnishings_stores = data.loc[data['NAICS'].str.contains('442', na = False)].reset_index()
Furniture_and_home_furnishings_stores

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,5506,2000,1,Furniture and home furnishings stores[442],8500
1,5608,2000,2,Furniture and home furnishings stores[442],9250
2,5710,2000,3,Furniture and home furnishings stores[442],9250
3,5812,2000,4,Furniture and home furnishings stores[442],9250
4,5914,2000,5,Furniture and home furnishings stores[442],7750
...,...,...,...,...,...
271,33131,1999,8,Furniture and home furnishings stores[442],6750
272,33234,1999,9,Furniture and home furnishings stores[442],7500
273,33337,1999,10,Furniture and home furnishings stores[442],7500
274,33440,1999,11,Furniture and home furnishings stores[442],7250


In [92]:
Electronics_and_appliance_stores = data.loc[data['NAICS'].str.contains('443', na = False)].reset_index()
Electronics_and_appliance_stores

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,5495,2000,1,Electronics and appliance stores[443],13500
1,5597,2000,2,Electronics and appliance stores[443],10750
2,5699,2000,3,Electronics and appliance stores[443],10500
3,5801,2000,4,Electronics and appliance stores[443],11250
4,5903,2000,5,Electronics and appliance stores[443],11500
...,...,...,...,...,...
271,33120,1999,8,Electronics and appliance stores[443],14750
272,33223,1999,9,Electronics and appliance stores[443],13750
273,33326,1999,10,Electronics and appliance stores[443],10000
274,33429,1999,11,Electronics and appliance stores[443],13000


In [96]:
BMGESD = data.loc[data['NAICS'].str.contains('444', na = False)].reset_index()
BMGESD

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,5481,2000,1,Building material and garden equipment and sup...,12250
1,5583,2000,2,Building material and garden equipment and sup...,13250
2,5685,2000,3,Building material and garden equipment and sup...,13000
3,5787,2000,4,Building material and garden equipment and sup...,12000
4,5889,2000,5,Building material and garden equipment and sup...,12500
...,...,...,...,...,...
271,33106,1999,8,Building material and garden equipment and sup...,16000
272,33209,1999,9,Building material and garden equipment and sup...,13500
273,33312,1999,10,Building material and garden equipment and sup...,14250
274,33415,1999,11,Building material and garden equipment and sup...,14750


In [99]:
Food_and_beverage_stores = data.loc[data['NAICS'].str.contains('445', na = False)].reset_index()
Food_and_beverage_stores

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,5500,2000,1,Food and beverage stores[445],61250
1,5602,2000,2,Food and beverage stores[445],60250
2,5704,2000,3,Food and beverage stores[445],63250
3,5806,2000,4,Food and beverage stores[445],70500
4,5908,2000,5,Food and beverage stores[445],68500
...,...,...,...,...,...
271,33125,1999,8,Food and beverage stores[445],57000
272,33228,1999,9,Food and beverage stores[445],55500
273,33331,1999,10,Food and beverage stores[445],60000
274,33434,1999,11,Food and beverage stores[445],58750


In [101]:
Health_and_personal_care_stores = data.loc[data['NAICS'].str.contains('446', na = False)].reset_index()
Health_and_personal_care_stores

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,5510,2000,1,Health and personal care stores[446],11000
1,5612,2000,2,Health and personal care stores[446],14000
2,5714,2000,3,Health and personal care stores[446],16750
3,5816,2000,4,Health and personal care stores[446],17000
4,5918,2000,5,Health and personal care stores[446],17500
...,...,...,...,...,...
271,33135,1999,8,Health and personal care stores[446],10750
272,33238,1999,9,Health and personal care stores[446],12250
273,33341,1999,10,Health and personal care stores[446],13000
274,33444,1999,11,Health and personal care stores[446],12750


In [103]:
Gasoline_stations = data.loc[data['NAICS'].str.contains('447', na = False)].reset_index()
Gasoline_stations

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,5508,2000,1,Gasoline stations[447],10000
1,5610,2000,2,Gasoline stations[447],10000
2,5712,2000,3,Gasoline stations[447],7750
3,5814,2000,4,Gasoline stations[447],8000
4,5916,2000,5,Gasoline stations[447],7250
...,...,...,...,...,...
271,33133,1999,8,Gasoline stations[447],9250
272,33236,1999,9,Gasoline stations[447],6750
273,33339,1999,10,Gasoline stations[447],6750
274,33442,1999,11,Gasoline stations[447],8250


In [106]:
Clothing_and_clothing_accessories_stores = data.loc[data['NAICS'].str.contains('448', na = False)].reset_index()
Clothing_and_clothing_accessories_stores

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,5485,2000,1,Clothing and clothing accessories stores[448],21000
1,5587,2000,2,Clothing and clothing accessories stores[448],20250
2,5689,2000,3,Clothing and clothing accessories stores[448],18250
3,5791,2000,4,Clothing and clothing accessories stores[448],18500
4,5893,2000,5,Clothing and clothing accessories stores[448],24000
...,...,...,...,...,...
271,33110,1999,8,Clothing and clothing accessories stores[448],21250
272,33213,1999,9,Clothing and clothing accessories stores[448],20750
273,33316,1999,10,Clothing and clothing accessories stores[448],20250
274,33419,1999,11,Clothing and clothing accessories stores[448],19250


In [108]:
SGHBMS = data.loc[data['NAICS'].str.contains('451', na = False)].reset_index()
SGHBMS

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,5559,2000,1,"Sporting goods, hobby, book and music stores[451]",9000
1,5661,2000,2,"Sporting goods, hobby, book and music stores[451]",10000
2,5763,2000,3,"Sporting goods, hobby, book and music stores[451]",9500
3,5865,2000,4,"Sporting goods, hobby, book and music stores[451]",10500
4,5967,2000,5,"Sporting goods, hobby, book and music stores[451]",12250
...,...,...,...,...,...
271,33185,1999,8,"Sporting goods, hobby, book and music stores[451]",17250
272,33288,1999,9,"Sporting goods, hobby, book and music stores[451]",11500
273,33391,1999,10,"Sporting goods, hobby, book and music stores[451]",10750
274,33494,1999,11,"Sporting goods, hobby, book and music stores[451]",11750


In [110]:
General_merchandise_stores = data.loc[data['NAICS'].str.contains('452', na = False)].reset_index()
General_merchandise_stores

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,5509,2000,1,General merchandise stores[452],23250
1,5611,2000,2,General merchandise stores[452],24000
2,5713,2000,3,General merchandise stores[452],25500
3,5815,2000,4,General merchandise stores[452],25500
4,5917,2000,5,General merchandise stores[452],26500
...,...,...,...,...,...
271,33134,1999,8,General merchandise stores[452],20250
272,33237,1999,9,General merchandise stores[452],27000
273,33340,1999,10,General merchandise stores[452],25500
274,33443,1999,11,General merchandise stores[452],24500


In [112]:
Miscellaneous_store_retailers = data.loc[data['NAICS'].str.contains('453', na = False)].reset_index()
Miscellaneous_store_retailers

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,5525,2000,1,Miscellaneous store retailers[453],18250
1,5627,2000,2,Miscellaneous store retailers[453],20000
2,5729,2000,3,Miscellaneous store retailers[453],23750
3,5831,2000,4,Miscellaneous store retailers[453],23000
4,5933,2000,5,Miscellaneous store retailers[453],23750
...,...,...,...,...,...
271,33150,1999,8,Miscellaneous store retailers[453],18750
272,33253,1999,9,Miscellaneous store retailers[453],19250
273,33356,1999,10,Miscellaneous store retailers[453],22000
274,33459,1999,11,Miscellaneous store retailers[453],21000


In [116]:
Non_store_retailers = data.loc[data['NAICS'].str.contains('454', na = False)].reset_index()
Non_store_retailers

Unnamed: 0,index,SYEAR,SMTH,NAICS,_EMPLOYMENT_
0,5530,2000,1,Non-store retailers[454],9750
1,5632,2000,2,Non-store retailers[454],10500
2,5734,2000,3,Non-store retailers[454],10250
3,5836,2000,4,Non-store retailers[454],8000
4,5938,2000,5,Non-store retailers[454],7000
...,...,...,...,...,...
271,33156,1999,8,Non-store retailers[454],9750
272,33259,1999,9,Non-store retailers[454],8500
273,33362,1999,10,Non-store retailers[454],10000
274,33465,1999,11,Non-store retailers[454],7750


In [None]:
SGHBMS['EMPLOYMENT'] = Non_store_retailers['_EMPLOYMENT_'] + Miscellaneous_store_retailers['_EMPLOYMENT_'] + General_merchandise_stores['_EMPLOYMENT_'] + SGHBMS['_EMPLOYMENT_'] + Clothing_and_clothing_accessories_stores['_EMPLOYMENT_'] + Gasoline_stations['_EMPLOYMENT_'] + Health_and_personal_care_stores['_EMPLOYMENT_'] + Food_and_beverage_stores['_EMPLOYMENT_'] + APRRS['_EMPLOYMENT_'] + OPSTS['_EMPLOYMENT_']
Other_retail_trade = SGHBMS
Other_retail_trade

Non_store_retailers
Miscellaneous_store_retailers
General_merchandise_stores
SGHBMS
Clothing_and_clothing_accessories_stores
Gasoline_stations
Health_and_personal_care_stores
Food_and_beverage_stores
BMGESD
Electronics_and_appliance_stores
Furniture_and_home_furnishings_stores