Skip to content

Commit

Permalink
Remove all spaces/format chars from data files
Browse files Browse the repository at this point in the history
The use of spaces and/or other punctuation in filenames has been
problematic for years. Even though modern OSes (usually) handle it
somewhat OK, that can't be assumed. Older OSes (especially big *NIXes)
could be especially problematic.

This renames all data files in the thermo/ directory to use underscores
instead of spaces, removes (), and changes "_-_" to a single underscore.
References to the files are also updated.

I ran the test suite and the results are the same pre/post. (Some of the
    tests failed on a clean copy of master -- is the test suite 100%?)
  • Loading branch information
danieldjewell committed Aug 31, 2020
1 parent 9f4175a commit b49d3aa
Show file tree
Hide file tree
Showing 128 changed files with 112 additions and 112 deletions.
12 changes: 6 additions & 6 deletions setup.py
Expand Up @@ -71,10 +71,10 @@
'combustion', 'environmental engineering', 'solubility', 'vapor pressure',
'equation of state', 'molecule'],
classifiers = classifiers,
package_data={'thermo': ['Critical Properties/*', 'Density/*',
'Electrolytes/*', 'Environment/*', 'Heat Capacity/*', 'Identifiers/*',
'Law/*', 'Misc/*', 'Phase Change/*', 'Reactions/*', 'Safety/*',
'Solubility/*', 'Interface/*', 'Triple Properties/*',
'Thermal Conductivity/*',
'Vapor Pressure/*', 'Viscosity/*']}
package_data={'thermo': ['Critical_Properties/*', 'Density/*',
'Electrolytes/*', 'Environment/*', 'Heat_Capacity/*', 'Identifiers/*',
'Law/*', 'Misc/*', 'Phase_Change/*', 'Reactions/*', 'Safety/*',
'Solubility/*', 'Interface/*', 'Triple_Properties/*',
'Thermal_Conductivity/*',
'Vapor_Pressure/*', 'Viscosity/*']}
)
8 changes: 4 additions & 4 deletions tests/test_identifiers.py
Expand Up @@ -52,7 +52,7 @@ def test_database_formulas():

def test_organic_user_db():
db = ChemicalMetadataDB(elements=False,
main_db=os.path.join(folder, 'chemical identifiers example user db.tsv'),
main_db=os.path.join(folder, 'chemical_identifiers_example_user_db.tsv'),
user_dbs=[])
for CAS, d in db.CAS_index.items():
assert CAS_from_any(d.CASs) == d.CASs
Expand Down Expand Up @@ -98,7 +98,7 @@ def test_organic_user_db():

def test_inorganic_db():
db = ChemicalMetadataDB(elements=False,
main_db=os.path.join(folder, 'Inorganic db.tsv'),
main_db=os.path.join(folder, 'Inorganic_db.tsv'),
user_dbs=[])

# Check CAS lookup
Expand Down Expand Up @@ -271,7 +271,7 @@ def test_fake_CAS_numbers():
s = "20{0:0>5}000-00-0".format(i)
if checkCAS(s):
known.append(s+'\t\n')
f = open('Fake CAS Registry.tsv', 'w')
f = open('Fake_CAS_Registry.tsv', 'w')
f.writelines(known)
f.close()
'''
Expand Down Expand Up @@ -335,4 +335,4 @@ def test_db_vs_ChemSep():
# In an ideal world we could also validate against their smiles
# but that's proving difficult due to things like 1-hexene -
# is it 'CCCCC=C' or 'C=CCCCC'?
#test_db_vs_ChemSep()
#test_db_vs_ChemSep()
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
4 changes: 2 additions & 2 deletions thermo/coolprop.py
Expand Up @@ -290,11 +290,11 @@ def CoolProp_T_dependent_property(T, CASRN, prop, phase):
raise Exception('Error in CoolProp property function')

if has_CoolProp:
f = open(os.path.join(folder, 'CoolProp vapor properties fits.json'), 'r')
f = open(os.path.join(folder, 'CoolProp_vapor_properties_fits.json'), 'r')
vapor_properties = json.load(f)
f.close()

f = open(os.path.join(folder, 'CoolProp CP0MOLAR fits.json'), 'r')
f = open(os.path.join(folder, 'CoolProp_CP0MOLAR_fits.json'), 'r')
idea_gas_heat_capacity = json.load(f)
f.close()

Expand Down
6 changes: 3 additions & 3 deletions thermo/critical.py
Expand Up @@ -39,7 +39,7 @@
from thermo.utils import mixing_simple, none_and_length_check


folder = os.path.join(os.path.dirname(__file__), 'Critical Properties')
folder = os.path.join(os.path.dirname(__file__), 'Critical_Properties')


### Read the various data files
Expand All @@ -63,7 +63,7 @@


_crit_PSRKR4 = pd.read_csv(os.path.join(folder,
'Appendix to PSRK Revision 4.tsv'), sep='\t', index_col=0)
'Appendix_to_PSRK_Revision_4.tsv'), sep='\t', index_col=0)
_crit_PSRKR4['Zc'] = pd.Series(_crit_PSRKR4['Pc']*_crit_PSRKR4['Vc']/_crit_PSRKR4['Tc']/R,
index=_crit_PSRKR4.index)

Expand All @@ -72,7 +72,7 @@
sep='\t', index_col=0)


_crit_Yaws = pd.read_csv(os.path.join(folder, 'Yaws Collection.tsv'),
_crit_Yaws = pd.read_csv(os.path.join(folder, 'Yaws_Collection.tsv'),
sep='\t', index_col=0)
_crit_Yaws['Zc'] = pd.Series(_crit_Yaws['Pc']*_crit_Yaws['Vc']/_crit_Yaws['Tc']/R,
index=_crit_Yaws.index)
Expand Down
6 changes: 3 additions & 3 deletions thermo/dipole.py
Expand Up @@ -30,13 +30,13 @@

folder = os.path.join(os.path.dirname(__file__), 'Misc')

_dipole_Poling = pd.read_csv(os.path.join(folder, 'Poling Dipole.csv'),
_dipole_Poling = pd.read_csv(os.path.join(folder, 'Poling_Dipole.csv'),
sep='\t', index_col=0)

_dipole_CCDB = pd.read_csv(os.path.join(folder, 'cccbdb.nist.gov Dipoles.csv'),
_dipole_CCDB = pd.read_csv(os.path.join(folder, 'cccbdb.nist.gov_Dipoles.csv'),
sep='\t', index_col=0)

_dipole_Muller = pd.read_csv(os.path.join(folder, 'Muller Supporting Info Dipoles.csv'),
_dipole_Muller = pd.read_csv(os.path.join(folder, 'Muller_Supporting_Info_Dipoles.csv'),
sep='\t', index_col=0)


Expand Down
16 changes: 8 additions & 8 deletions thermo/electrochem.py
Expand Up @@ -52,22 +52,22 @@



Lange_cond_pure = pd.read_csv(os.path.join(folder, 'Lange Pure Species Conductivity.tsv'),
Lange_cond_pure = pd.read_csv(os.path.join(folder, 'Lange_Pure_Species_Conductivity.tsv'),
sep='\t', index_col=0)

Marcus_ion_conductivities = pd.read_csv(os.path.join(folder, 'Marcus Ion Conductivities.tsv'),
Marcus_ion_conductivities = pd.read_csv(os.path.join(folder, 'Marcus_Ion_Conductivities.tsv'),
sep='\t', index_col=0)

CRC_ion_conductivities = pd.read_csv(os.path.join(folder, 'CRC conductivity infinite dilution.tsv'),
CRC_ion_conductivities = pd.read_csv(os.path.join(folder, 'CRC_conductivity_infinite_dilution.tsv'),
sep='\t', index_col=0)

Magomedovk_thermal_cond = pd.read_csv(os.path.join(folder, 'Magomedov Thermal Conductivity.tsv'),
Magomedovk_thermal_cond = pd.read_csv(os.path.join(folder, 'Magomedov_Thermal_Conductivity.tsv'),
sep='\t', index_col=0)

CRC_aqueous_thermodynamics = pd.read_csv(os.path.join(folder, 'CRC Thermodynamic Properties of Aqueous Ions.csv'),
CRC_aqueous_thermodynamics = pd.read_csv(os.path.join(folder, 'CRC_Thermodynamic_Properties_of_Aqueous_Ions.csv'),
sep='\t', index_col=0)

electrolyte_dissociation_reactions = pd.read_csv(os.path.join(folder, 'Electrolyte dissociations.csv'), sep='\t')
electrolyte_dissociation_reactions = pd.read_csv(os.path.join(folder, 'Electrolyte_dissociations.csv'), sep='\t')


_Laliberte_Density_ParametersDict = {}
Expand Down Expand Up @@ -587,7 +587,7 @@ def dilute_ionic_conductivity(ionic_conductivities, zs, rhom):
["Formula", 'lambda_coeffs', 'A_coeffs', 'B', 'multiplier'])

McCleskey_conductivities = {}
with open(os.path.join(folder, 'McCleskey Electrical Conductivity.csv')) as f:
with open(os.path.join(folder, 'McCleskey_Electrical_Conductivity.csv')) as f:
next(f)
for line in f:
values = line.strip().split('\t')
Expand Down Expand Up @@ -673,7 +673,7 @@ def conductivity_McCleskey(T, M, lambda_coeffs, A_coeffs, B, multiplier, rho=100



Lange_cond_pure = pd.read_csv(os.path.join(folder, 'Lange Pure Species Conductivity.tsv'),
Lange_cond_pure = pd.read_csv(os.path.join(folder, 'Lange_Pure_Species_Conductivity.tsv'),
sep='\t', index_col=0)


Expand Down
8 changes: 4 additions & 4 deletions thermo/environment.py
Expand Up @@ -35,19 +35,19 @@
### Global Warming Potentials

GWP_data = pd.read_csv(os.path.join(folder,
'Official Global Warming Potentials.tsv'), sep='\t',
'Official_Global_Warming_Potentials.tsv'), sep='\t',
index_col=0)

ODP_data = pd.read_csv(os.path.join(folder,
'Ozone Depletion Potentials.tsv'), sep='\t',
'Ozone_Depletion_Potentials.tsv'), sep='\t',
index_col=0)

CRClogPDict = pd.read_csv(os.path.join(folder,
'CRC logP table.tsv'), sep='\t',
'CRC_logP_table.tsv'), sep='\t',
index_col=0)

SyrresDict2 = pd.read_csv(os.path.join(folder,
'Syrres logP data.csv.gz'), sep='\t',
'Syrres_logP_data.csv.gz'), sep='\t',
index_col=0, compression='gzip')

IPCC100 = 'IPCC (2007) 100yr'
Expand Down
8 changes: 4 additions & 4 deletions thermo/heat_capacity.py
Expand Up @@ -56,7 +56,7 @@



folder = os.path.join(os.path.dirname(__file__), 'Heat Capacity')
folder = os.path.join(os.path.dirname(__file__), 'Heat_Capacity')


Poling_data = pd.read_csv(os.path.join(folder,
Expand All @@ -66,14 +66,14 @@


TRC_gas_data = pd.read_csv(os.path.join(folder,
'TRC Thermodynamics of Organic Compounds in the Gas State.tsv'), sep='\t',
'TRC_Thermodynamics_of_Organic_Compounds_in_the_Gas_State.tsv'), sep='\t',
index_col=0)
_TRC_gas_data_values = TRC_gas_data.values



_PerryI = {}
with open(os.path.join(folder, 'Perrys Table 2-151.tsv'), encoding='utf-8') as f:
with open(os.path.join(folder, 'Perrys_Table_2-151.tsv'), encoding='utf-8') as f:
'''Read in a dict of heat capacities of irnorganic and elemental solids.
These are in section 2, table 151 in:
Green, Don, and Robert Perry. Perry's Chemical Engineers' Handbook,
Expand Down Expand Up @@ -115,7 +115,7 @@
# Chemistry and Physics. [Boca Raton, FL]: CRC press, 2014.
# Warning: 11 duplicated chemicals are present and currently clobbered.
CRC_standard_data = pd.read_csv(os.path.join(folder,
'CRC Standard Thermodynamic Properties of Chemical Substances.tsv'), sep='\t',
'CRC_Standard_Thermodynamic_Properties_of_Chemical_Substances.tsv'), sep='\t',
index_col=0)


Expand Down
14 changes: 7 additions & 7 deletions thermo/identifiers.py
Expand Up @@ -124,18 +124,18 @@ def __init__(self, pubchemid, CAS, formula, MW, smiles, InChI, InChI_key,

class ChemicalMetadataDB(object):
exclusion_options = [os.path.join(folder, 'dippr_2014_int.csv'),
os.path.join(folder, 'Chemicals with data.csv')]
os.path.join(folder, 'Chemicals_with_data.csv')]

def __init__(self, create_pubchem_index=True, create_CAS_index=True,
create_name_index=True, create_smiles_index=True,
create_InChI_index=True, create_InChI_key_index=True,
create_formula_index=True,
restrict_identifiers_file=None, elements=True,
main_db=os.path.join(folder, 'chemical identifiers.tsv'),
user_dbs=[os.path.join(folder, 'chemical identifiers example user db.tsv'),
os.path.join(folder, 'Cation db.tsv'),
os.path.join(folder, 'Anion db.tsv'),
os.path.join(folder, 'Inorganic db.tsv')]):
main_db=os.path.join(folder, 'chemical_identifiers.tsv'),
user_dbs=[os.path.join(folder, 'chemical_identifiers_example_user_db.tsv'),
os.path.join(folder, 'Cation_db.tsv'),
os.path.join(folder, 'Anion_db.tsv'),
os.path.join(folder, 'Inorganic_db.tsv')]):


self.pubchem_index = {}
Expand Down Expand Up @@ -659,7 +659,7 @@ def synonyms(CASRN):

_MixtureDict = {}
_MixtureDictLookup = {}
with open(os.path.join(folder, 'Mixtures Compositions.tsv')) as f:
with open(os.path.join(folder, 'Mixtures_Compositions.tsv')) as f:
'''Read in a dict of 90 or so mixutres, their components, and synonyms.
Small errors in mole fractions not adding to 1 are known.
Errors in adding mass fraction are less common, present at the 5th decimal.
Expand Down
2 changes: 1 addition & 1 deletion thermo/interface.py
Expand Up @@ -58,7 +58,7 @@
sep='\t', index_col=0)
_Somayajulu_data_2_values = Somayajulu_data_2.values

VDI_PPDS_11 = pd.read_csv(os.path.join(folder, 'VDI PPDS surface tensions.tsv'),
VDI_PPDS_11 = pd.read_csv(os.path.join(folder, 'VDI_PPDS_surface_tensions.tsv'),
sep='\t', index_col=0)
_VDI_PPDS_11_values = VDI_PPDS_11.values

Expand Down
16 changes: 8 additions & 8 deletions thermo/law.py
Expand Up @@ -84,20 +84,20 @@ def load_law_data():
global TSCA_data, EINECS_data, SPIN_data, NLP_data

# Data is stored as integers to reduce memory usage
DSL_data = pd.read_csv(os.path.join(folder, 'Canada Feb 11 2015 - DSL.csv.gz'),
DSL_data = pd.read_csv(os.path.join(folder, 'Canada_Feb_11_2015_DSL.csv.gz'),
sep='\t', index_col=0, compression='gzip')

TSCA_data = pd.read_csv(os.path.join(folder, 'TSCA Inventory 2016-01.csv.gz'),
TSCA_data = pd.read_csv(os.path.join(folder, 'TSCA_Inventory_2016-01.csv.gz'),
sep='\t', index_col=0, compression='gzip')


EINECS_data = pd.read_csv(os.path.join(folder, 'EINECS 2015-03.csv.gz'),
EINECS_data = pd.read_csv(os.path.join(folder, 'EINECS_2015-03.csv.gz'),
index_col=0, compression='gzip')

SPIN_data = pd.read_csv(os.path.join(folder, 'SPIN Inventory 2015-03.csv.gz'),
SPIN_data = pd.read_csv(os.path.join(folder, 'SPIN_Inventory_2015-03.csv.gz'),
compression='gzip', index_col=0)

NLP_data = pd.read_csv(os.path.join(folder, 'EC Inventory No Longer Polymers (NLP).csv'),
NLP_data = pd.read_csv(os.path.join(folder, 'EC_Inventory_No_Longer_Polymers_NLP.csv'),
sep='\t', index_col=0)
# 161162-67-6 is not a valid CAS number and was removed.

Expand Down Expand Up @@ -266,12 +266,12 @@ def load_economic_data():
global _EPACDRDict, _ECHATonnageDict

'''OECD are chemicals produced by and OECD members in > 1000 tonnes/year.'''
HPV_data = pd.read_csv(os.path.join(folder, 'HPV 2015 March 3.csv'),
HPV_data = pd.read_csv(os.path.join(folder, 'HPV_2015_March_3.csv'),
sep='\t', index_col=0)
# 13061-29-2 not valid and removed

_ECHATonnageDict = {}
with zipfile.ZipFile(os.path.join(folder, 'ECHA Tonnage Bands.csv.zip')) as z:
with zipfile.ZipFile(os.path.join(folder, 'ECHA_Tonnage_Bands.csv.zip')) as z:
with z.open(z.namelist()[0]) as f:
for line in f.readlines():
# for some reason, the file must be decoded to UTF8 first
Expand All @@ -286,7 +286,7 @@ def load_economic_data():


_EPACDRDict = {}
with open(os.path.join(folder, 'EPA 2012 Chemical Data Reporting.csv')) as f:
with open(os.path.join(folder, 'EPA_2012_Chemical_Data_Reporting.csv')) as f:
'''EPA summed reported chemical usages. In metric tonnes/year after conversion.
Many producers keep their date confidential.
This was originally in terms of lb/year, but rounded to the nearest kg.
Expand Down
6 changes: 3 additions & 3 deletions thermo/miscdata.py
Expand Up @@ -33,11 +33,11 @@
### CRC Handbook general tables

CRC_inorganic_data = pd.read_csv(os.path.join(folder,
'Physical Constants of Inorganic Compounds.csv'), sep='\t', index_col=0)
'Physical_Constants_of_Inorganic_Compounds.csv'), sep='\t', index_col=0)


CRC_organic_data = pd.read_csv(os.path.join(folder,
'Physical Constants of Organic Compounds.csv'), sep='\t', index_col=0)
'Physical_Constants_of_Organic_Compounds.csv'), sep='\t', index_col=0)


### VDI Saturation
Expand All @@ -51,7 +51,7 @@
# After some consideration, it has been devided to keep this load method as is.

_VDISaturationDict = {}
with open(os.path.join(folder, 'VDI Saturation Compounds Data.csv')) as f:
with open(os.path.join(folder, 'VDI_Saturation_Compounds_Data.csv')) as f:
'''Read in a dict of assorted chemical properties at saturation for 58
industrially important chemicals, from:
Gesellschaft, V. D. I., ed. VDI Heat Atlas. 2E. Berlin : Springer, 2010.
Expand Down
2 changes: 1 addition & 1 deletion thermo/permittivity.py
Expand Up @@ -33,7 +33,7 @@
folder = os.path.join(os.path.dirname(__file__), 'Electrolytes')


CRC_Permittivity_data = pd.read_csv(os.path.join(folder, 'Permittivity (Dielectric Constant) of Liquids.tsv'),
CRC_Permittivity_data = pd.read_csv(os.path.join(folder, 'Permittivity_Dielectric_Constant_of_Liquids.tsv'),
sep='\t', index_col=0)
_CRC_Permittivity_data_values = CRC_Permittivity_data.values

Expand Down
20 changes: 10 additions & 10 deletions thermo/phase_change.py
Expand Up @@ -42,36 +42,36 @@
from thermo.coolprop import has_CoolProp, PropsSI, coolprop_dict, coolprop_fluids
from thermo.dippr import EQ106

folder = os.path.join(os.path.dirname(__file__), 'Phase Change')
folder = os.path.join(os.path.dirname(__file__), 'Phase_Change')


Yaws_data = pd.read_csv(os.path.join(folder,
'Yaws Boiling Points.tsv'), sep='\t', index_col=0)
'Yaws_Boiling_Points.tsv'), sep='\t', index_col=0)

Tm_ON_data = pd.read_csv(os.path.join(folder, 'OpenNotebook Melting Points.tsv'),
Tm_ON_data = pd.read_csv(os.path.join(folder, 'OpenNotebook_Melting_Points.tsv'),
sep='\t', index_col=0)

GharagheiziHvap_data = pd.read_csv(os.path.join(folder, 'Ghazerati Appendix Vaporization Enthalpy.tsv'),
GharagheiziHvap_data = pd.read_csv(os.path.join(folder, 'Ghazerati_Appendix_Vaporization_Enthalpy.tsv'),
sep='\t', index_col=0)

CRCHvap_data = pd.read_csv(os.path.join(folder, 'CRC Handbook Heat of Vaporization.tsv'),
CRCHvap_data = pd.read_csv(os.path.join(folder, 'CRC_Handbook_Heat_of_Vaporization.tsv'),
sep='\t', index_col=0)

CRCHfus_data = pd.read_csv(os.path.join(folder, 'CRC Handbook Heat of Fusion.tsv'),
CRCHfus_data = pd.read_csv(os.path.join(folder, 'CRC_Handbook_Heat_of_Fusion.tsv'),
sep='\t', index_col=0)

GharagheiziHsub_data = pd.read_csv(os.path.join(folder, 'Ghazerati Appendix Sublimation Enthalpy.tsv'),
GharagheiziHsub_data = pd.read_csv(os.path.join(folder, 'Ghazerati_Appendix_Sublimation_Enthalpy.tsv'),
sep='\t', index_col=0)

Perrys2_150 = pd.read_csv(os.path.join(folder, 'Table 2-150 Heats of Vaporization of Inorganic and Organic Liquids.tsv'),
Perrys2_150 = pd.read_csv(os.path.join(folder, 'Table_2-150_Heats_of_Vaporization_of_Inorganic_and_Organic_Liquids.tsv'),
sep='\t', index_col=0)
_Perrys2_150_values = Perrys2_150.values

VDI_PPDS_4 = pd.read_csv(os.path.join(folder, 'VDI PPDS Enthalpies of vaporization.tsv'),
VDI_PPDS_4 = pd.read_csv(os.path.join(folder, 'VDI_PPDS_Enthalpies_of_vaporization.tsv'),
sep='\t', index_col=0)
_VDI_PPDS_4_values = VDI_PPDS_4.values

Alibakhshi_Cs = pd.read_csv(os.path.join(folder, 'Alibakhshi one-coefficient enthalpy of vaporization.tsv'),
Alibakhshi_Cs = pd.read_csv(os.path.join(folder, 'Alibakhshi_one-coefficient_enthalpy_of_vaporization.tsv'),
sep='\t', index_col=0)


Expand Down

0 comments on commit b49d3aa

Please sign in to comment.