In [1]:
import pandas as pd # need this 
import datetime as dt
import numpy as np

This notebook will convert HS6 codes to NAICS codes using the census concordance posted [https://www.census.gov/foreign-trade/reference/codes/concordance/expconcord17.xls](https://www.census.gov/foreign-trade/reference/codes/concordance/expconcord17.xls). 

This is quite simple, we read in the China list, read in the concordance. Using the concordance we will create a dictionary that will map hs6 codes into naics codes. Then use `.replace` to create the mapping.

#### Step 1: Read in the tariff data 

Note something to be mindful of is the type around the codes. Zeros in front matter here, so setting these as a string is best.

In [2]:
df50_list = pd.read_excel("tariff_data.xlsx", sheet_name = "$50billion list", dtype = {"HS-8 code": str,"HS6": str})
# This reads it in...

df50_list["time_of_tariff"] = np.nan 
# create an empty column for which we will fill in with the date that the tariff was implemented

df50_list.loc[df50_list["Tariff rate"] == 25,"time_of_tariff"] = dt.datetime(2018,7,6)
# time of the first one

df50_list.loc[df50_list["Tariff rate"] == "25pending","time_of_tariff"] = dt.datetime(2018,8,23)
# time of the second one

df50_list.drop(["Unnamed: 7", "Unnamed: 8"], axis = 1, inplace = True)
# get rid of some columns

df50_list.rename({"2017 Chinese import from the US ($1,000)": "value"}, axis = 1, inplace = True)

df50_list.head(10)

Unnamed: 0,HS-8 code,Product description,value,Tariff rate,HS6,BEC category,BEC description,time_of_tariff
0,2012000,Fresh or chilled unboned bovine meat ( excl. c...,400,25,20120,122,Consumer,2018-07-06 00:00:00
1,2013000,Fresh or chilled boneless bovine meat,4658,25,20130,122,Consumer,2018-07-06 00:00:00
2,2021000,Frozen bovine carcasses & half carcasses,0,25,20210,121,Intermediate,2018-07-06 00:00:00
3,2022000,Frozen unboned bovine meat (excl. carcasses),1522,25,20220,122,Consumer,2018-07-06 00:00:00
4,2023000,Frozen boneless bovine meat,18421,25,20230,122,Consumer,2018-07-06 00:00:00
5,2031200,"Fresh or chilled unboned hams, shoulders & cut...",31,25,20312,122,Consumer,2018-07-06 00:00:00
6,2031900,Other fresh or chilled swine meat,15,25,20319,122,Consumer,2018-07-06 00:00:00
7,2032190,"Frozen swine carcasses & half carcasses, nes",0,25,20321,121,Intermediate,2018-07-06 00:00:00
8,2032200,"Frozen unboned hams, shoulders & cuts thereof ...",117672,25,20322,122,Consumer,2018-07-06 00:00:00
9,2032900,Other frozen swine meat,167924,25,20329,122,Consumer,2018-07-06 00:00:00


Read in the census concordance, for exports, in 2017. Not sure how much variation there is from year to year. This is something that could be checked. 

In [3]:
url = "https://www.census.gov/foreign-trade/reference/codes/concordance/expconcord17.xls"

df_concordance = pd.read_excel(url, dtype = {"commodity": str, "naics": str})

# Again, be mindfull of the types here...

In [4]:
df_concordance.head()

Unnamed: 0,commodity,descriptn,abbreviatn,unit_qy1,unit_qy2,sitc,end_use,naics,usda,hitech
0,101210000,"HORSES, PUREBRED BREEDING, LIVE","HORSES, PUREBRED BREEDING, LIVE",NO,,150,10140,112920,0,0
1,101290000,"HORSES, LIVE, EXCEPT PUREBRED BREEDING","HORSES, LIVE, EXCEPT PUREBRED BREEDING",NO,,150,10140,112920,0,0
2,101300000,"ASSES, LIVE","ASSES, LIVE",NO,,150,10140,112920,0,0
3,101900000,"MULES AND HINNIES, LIVE","MULES AND HINNIES, LIVE",NO,,150,10140,112920,0,0
4,102210010,"BOVINES, PUREBRED BREEDING, DAIRY, MALE, LIVE","BOVINES, PUREBRED BREEDING, DAIRY, MALE, LIVE",NO,,111,10140,11211X,0,0


The commodity code here is 10digits. We want to create the 8 and 6 digit codes. So we truncate. Note I had difficulty converting the mapping from 8 into NAICS. But the 6 worked

In [5]:
df_concordance["hs8"] = df_concordance.commodity.str[0:8]

df_concordance["hs6"] = df_concordance.commodity.str[0:6]

So create a dictionary mapping HS6 codes into naics codes....

In [7]:
dict_concordance= dict(zip(df_concordance.hs6,df_concordance.naics)) 

Then use `.replace` to create the mapping....

In [8]:
df50_list["naics"] = df50_list.HS6

df50_list["naics"].replace(dict_concordance,value = None,inplace=True)

df50_list.head()

Unnamed: 0,HS-8 code,Product description,value,Tariff rate,HS6,BEC category,BEC description,time_of_tariff,naics
0,2012000,Fresh or chilled unboned bovine meat ( excl. c...,400,25,20120,122,Consumer,2018-07-06 00:00:00,311611
1,2013000,Fresh or chilled boneless bovine meat,4658,25,20130,122,Consumer,2018-07-06 00:00:00,311611
2,2021000,Frozen bovine carcasses & half carcasses,0,25,20210,121,Intermediate,2018-07-06 00:00:00,311611
3,2022000,Frozen unboned bovine meat (excl. carcasses),1522,25,20220,122,Consumer,2018-07-06 00:00:00,311611
4,2023000,Frozen boneless bovine meat,18421,25,20230,122,Consumer,2018-07-06 00:00:00,311611


#### Then do the same thing for the 60 billion list


In [9]:
df60_list = pd.read_excel("tariff_data.xlsx", sheet_name = "$60billion list", dtype = {"HS-8 code": str,"HS6": str})
# This reads it in...

df60_list["time_of_tariff"] = np.nan 
# create an empty column for which we will fill in with the date that the tariff was implemented

df60_list.loc[:,"time_of_tariff"] = dt.datetime(2018,9,24)

df60_list.rename({"Import value in 2017 ($1,000)": "value"}, axis = 1, inplace = True)

In [10]:
df60_list.head()

Unnamed: 0,HS code,Product description,value,Tariff rate,HS6,BEC code,BEC category,time_of_tariff
0,1012900,"Live horses, not pure-bred breeding",2,10,10129,111,Intermediate,2018-09-24
1,1061990,"Other edible mammals, not pure-bred breeding",3928,5,10619,111,Intermediate,2018-09-24
2,1062090,"Reptiles, nes",910,5,10620,111,Intermediate,2018-09-24
3,1064990,"Other insects, not for pure-bred breeding",35,5,10649,111,Intermediate,2018-09-24
4,1069090,"Other live animals, not for pure-bred breeding",59,10,10690,111,Intermediate,2018-09-24


In [11]:
df60_list["naics"] = df60_list.HS6

df60_list["naics"].replace(dict_concordance,value = None,inplace=True)

df60_list.head()

Unnamed: 0,HS code,Product description,value,Tariff rate,HS6,BEC code,BEC category,time_of_tariff,naics
0,1012900,"Live horses, not pure-bred breeding",2,10,10129,111,Intermediate,2018-09-24,112920
1,1061990,"Other edible mammals, not pure-bred breeding",3928,5,10619,111,Intermediate,2018-09-24,112990
2,1062090,"Reptiles, nes",910,5,10620,111,Intermediate,2018-09-24,112990
3,1064990,"Other insects, not for pure-bred breeding",35,5,10649,111,Intermediate,2018-09-24,112990
4,1069090,"Other live animals, not for pure-bred breeding",59,10,10690,111,Intermediate,2018-09-24,112990


In [12]:
df50_list.drop(["value","BEC category","BEC description"],axis = 1, inplace = True)

In [13]:
df60_list.rename({"HS code":"HS-8 code"},axis = 1,inplace = True)

In [14]:

df60_list.drop(["value","BEC code","BEC category"],axis = 1, inplace = True)

In [15]:
tariff_list = df50_list.append(df60_list)

In [16]:
tariff_list["naics4"] = tariff_list.naics.str[0:4]

In [17]:
tariff_list.rename({"Tariff rate":"tariff"},axis = 1,inplace = True)

In [18]:
tariff_list.tariff.replace("25pending",25,inplace = True)

In [19]:
tariff_list.head()

Unnamed: 0,HS-8 code,Product description,tariff,HS6,time_of_tariff,naics,naics4
0,2012000,Fresh or chilled unboned bovine meat ( excl. c...,25,20120,2018-07-06 00:00:00,311611,3116
1,2013000,Fresh or chilled boneless bovine meat,25,20130,2018-07-06 00:00:00,311611,3116
2,2021000,Frozen bovine carcasses & half carcasses,25,20210,2018-07-06 00:00:00,311611,3116
3,2022000,Frozen unboned bovine meat (excl. carcasses),25,20220,2018-07-06 00:00:00,311611,3116
4,2023000,Frozen boneless bovine meat,25,20230,2018-07-06 00:00:00,311611,3116


In [88]:
tariff_list.dtypes

HS-8 code              object
Product description    object
tariff                  int64
HS6                    object
time_of_tariff         object
naics                  object
naics4                 object
dtype: object

In [20]:
tariff_list.to_csv("tariff_list_naics.csv",index = False)

In [93]:
tariff_list.groupby("HS6").agg({"tariff":"median", ""})

Unnamed: 0_level_0,tariff
HS6,Unnamed: 1_level_1
010129,10.0
010619,5.0
010620,5.0
010649,5.0
010690,10.0
020120,25.0
020130,25.0
020210,25.0
020220,25.0
020230,25.0
