In [2]:
import pandas as pd

# Importing OSHA measure list into a set

In [30]:
osha = pd.read_csv('../data/osha-limits.csv')
osha.substance = osha.substance \
                     .astype('str') \
                     .str.strip()
osha_measures = frozenset(osha.substance)
print("{} measures from OSHA".format(len(osha_measures)))
osha = None

467 measures from OSHA


# Importing California measure list into a set

In [35]:
cal = pd.read_csv('../data/CA-result.csv.bz2', 
                  usecols=['CharacteristicName'])
cal.CharacteristicName = cal.CharacteristicName \
                            .astype('str') \
                            .str.strip()
wq_measures = frozenset(cal.CharacteristicName)
print("{} measures from CA data".format(len(wq_measures)))
cal = None

1792 measures from CA data


# Common measures

In [36]:
intersection = osha_measures.intersection(wq_measures)

print("{} shared measures:".format(len(intersection)))

for m in sorted(intersection):
    print("\t{}".format(m))

63 shared measures:
	1,1,2,2-Tetrachloroethane
	1,1,2-Trichloroethane
	1,1-Dichloroethane
	1,2,3-Trichloropropane
	Acetone
	Acrolein
	Aldrin
	Allyl chloride
	Ammonia
	Azinphos-methyl
	Bromine
	Carbon dioxide
	Carbon disulfide
	Carbon monoxide
	Carbon tetrachloride
	Chlordane
	Chlorine
	Chlorobenzene
	Cumene
	Cyclohexane
	Dibutyl phthalate
	Dichlorodifluoromethane
	Dieldrin
	Dinitro-o-cresol
	Endrin
	Ethyl ether
	Ethylene dibromide
	Fluorine
	Hafnium
	Heptachlor
	Hexachloroethane
	Hydrogen sulfide
	Iodine
	Isophorone
	Isopropyl ether
	Lindane
	Methyl acetate
	Methyl acrylate
	Methyl bromide
	Methyl iodide
	Methyl methacrylate
	Methylene chloride
	Naphthalene
	Nicotine
	Nitrobenzene
	Nitroglycerin
	Parathion
	Pentachlorophenol
	Pentane
	Phenol
	Propane
	Ronnel
	Styrene
	Tetrahydrofuran
	Toluene
	Tributyl phosphate
	Trichloroethylene
	Triphenyl phosphate
	Warfarin
	Yttrium
	o-Dichlorobenzene
	p-Dichlorobenzene
	p-Nitroaniline


# OSHA not in water data

In [37]:
osha_diff = osha_measures.difference(wq_measures)

print("{} OSHA measures not in water data:".format(len(osha_diff)))

for m in sorted(osha_diff):
    print("\t{}".format(m))

404 OSHA measures not in water data:
	1,1,1,2-Tetrachloro-2,2-difluoroethane
	1,1,2,2-Tetrachloro-1,2-difluoroethane
	1,1,2-Trichloro-1,2,2-trifluoroethane
	1,1-Dichloro-1-nitroethane
	1,1-Dimethylhydrazine
	1,2-Dichloroethylene
	1,3-Dichloro-5,5-dimethyl hydantoin
	1-Chloro-1-nitropropane
	1-Nitropropane
	2,4,5-T (2,4,5-tri-chlorophenoxyacetic acid)
	2,4,6-Trinitrotoluene (TNT)
	2,4-D (Dichlorophen-oxyacetic acid)
	2-Aminopyridine
	2-Butanone (Methyl ethyl ketone)
	2-Butoxyethanol
	2-Chloro-6-(trichloromethyl)pyridine, Total dust
	2-Chloro-6-(trichloromethyl)pyridine, Total dust, Respirable fraction
	2-Diethylaminoethanol
	2-Ethoxyethanol (Cellosolve)
	2-Ethoxyethyl acetate (Cellosolve acetate)
	2-Hexanone (Methyl n-butyl ketone)
	2-Methoxyethanol; (Methyl cellosolve)
	2-Methoxyethyl acetate (Methyl cellosolve acetate)
	2-Nitropropane
	2-Pentanone (Methylpropyl ketone)
	4,4'-Thiobis (6-tert,Butyl-m-cresol), Total dust
	4,4'-Thiobis (6-tert,Butyl-m-cresol), Total dust, Respirable fract

# Water not in OSHA data

In [38]:
water_diff = wq_measures.difference(osha_measures)

print("{} Water data measures not in OSHA list:".format(len(water_diff)))

for m in sorted(water_diff):
    print("\t{}".format(m))

1729 Water data measures not in OSHA list:
	(+/-)11-nor-9-carboxy-delta-THC
	(1R,2S,5R)-Menthol
	(RS)-AMPA (Aminomethyl phosphonic acid)
	.alpha.,.alpha.-Dimethylphenethylamine
	.alpha.-Chlordene
	.alpha.-Endosulfan
	.alpha.-Hexachlorocyclohexane
	.alpha.-Terpineol
	.beta.-Endosulfan
	.beta.-Hexachlorocyclohexane
	.delta.-Hexachlorocyclohexane
	.gamma.-Chlordene
	.lambda.-Cyhalothrin
	1,1'-(2-chloroethylidene)bis(4-chlorobenzene)
	1,1'-(chloroethenylidene)bis(4-chlorobenzene)
	1,1,1,2-Tetrachloroethane
	1,1,1-Trichloroethane
	1,1-Dichloroethylene
	1,1-Dichloropropene
	1,2,3,4,6,7,8,9-Octachlorodibenzo-p-dioxin
	1,2,3,4,6,7,8,9-Octachlorodibenzofuran
	1,2,3,4,6,7,8-Heptachlorodibenzo-p-dioxin
	1,2,3,4,6,7,8-Heptachlorodibenzofuran
	1,2,3,4,7,8,9-Heptachlorodibenzofuran
	1,2,3,4,7,8-Hexachlorodibenzo-p-dioxin
	1,2,3,4,7,8-Hexachlorodibenzofuran
	1,2,3,4-Tetramethylbenzene
	1,2,3,5-Tetramethylbenzene
	1,2,3,6,7,8-Hexachlorodibenzo-p-dioxin
	1,2,3,6,7,8-Hexachlorodibenzofuran
	1,2,3,7,8,9-