In [1]:
import pandas as pd
import warnings

# ugly but it works

In [2]:
def parse_dates(dataframe, date_column, print_format=False):
    if pd.api.types.is_datetime64_any_dtype(dataframe[date_column]):
        return dataframe
    
    warnings.filterwarnings("error")
    date_formats = ["%m/%d/%Y", "%m/%d/%y", "%Y/%m/%d", "%Y-%m-%d"]
    correct_format = False
    for date_format in date_formats:
        try:
            dataframe[date_column] = pd.to_datetime(dataframe[date_column], format=date_format)
            correct_format = True
            if print_format:
                print(date_format)
        except (UserWarning, ValueError):
            continue

    warnings.resetwarnings()
    if not correct_format:
        raise NotImplementedError('Correct date format not found.')

    return dataframe

### Required functions to crop csv files

**METADATA at the bottom of the file, won't parse**:
[data\\2017\\S010102_VOC_2017_EN.csv](data\\2017\\S010102_VOC_2017_EN.csv)

In [3]:
def how_many_csv_rows_to_skip(filepath, date_column_list):
    num_rows_before_header = 0

    with open(filepath, 'r', encoding='ISO-8859-1') as file:
        for row in csv.reader(file):
            for date_column in date_column_list:
                if date_column in row:
                    return num_rows_before_header, date_column
            num_rows_before_header += 1

    return None, None


def csv_to_pandas(filepath, date_column_list=['Compounds', 'Sampling Date']):
    num_rows_to_skip, date_column = how_many_csv_rows_to_skip(filepath, date_column_list)
    parsed_data = []

    with open(filepath, 'r', encoding='ISO-8859-1') as file:
        reader = csv.reader(file)

        for _ in range(num_rows_to_skip):
            next(reader)
        
        headers = next(reader)
        
        for row in reader:
            if all(item == '' for item in row):
                break
            parsed_data.append(row)

    df = pd.DataFrame(parsed_data, columns=headers)

    return df

### testing

In [4]:
# simple testing

file = 'data\\2005\\S100111_CSV.csv'
df = pd.read_csv(file, header=1)

parse_dates(df, 'Compounds').dtypes

Compounds        datetime64[ns]
Ethane                  float64
Ethylene                float64
Acetylene               float64
Propylene               float64
                      ...      
Canister ID#             object
Sample Volume             int64
NAPS ID                   int64
START TIME               object
DURATION                  int64
Length: 185, dtype: object

In [5]:
# big testing

import csv
import openpyxl
import os
import xlrd
import io

def how_many_csv_rows_to_skip(filepath, date_column_list):
    num_rows_before_header = 0

    with open(filepath, 'r', encoding='ISO-8859-1') as file:
        for row in csv.reader(file):
            for date_column in date_column_list:
                if date_column in row:
                    return num_rows_before_header, date_column
            num_rows_before_header += 1

    return None, None


def how_many_xlsx_rows_to_skip(filepath, date_column_list):
    num_rows_before_header = 0

    with open(filepath, "rb") as f:  # fucking malloc
        in_mem_file = io.BytesIO(f.read())
    wb = openpyxl.load_workbook(in_mem_file, read_only=True)

    sheet = None
    for name in wb.sheetnames:
        if 'voc' in name.lower():
            sheet = wb[name]
            break
        if 'data' in name.lower() and 'metadata' not in name.lower():
            sheet = wb[name]
            break

    for row in sheet.iter_rows(values_only=True):
        for date_column in date_column_list:
            if date_column in row:
                return num_rows_before_header, date_column, name
        num_rows_before_header += 1
       
    return None, None, None


def get_xls_sheet(workbook):
    for name in workbook.sheet_names():
        if 'voc' in name.lower():
            return workbook[name], name
        elif 'data' in name.lower():
            return workbook[name], name
    return workbook.sheet_by_index(0), 0


def how_many_xls_rows_to_skip(filepath, date_column_list):
    num_rows_before_header = 0
    wb = xlrd.open_workbook(filepath, encoding_override='ISO-8859-1')

    sheet, name = get_xls_sheet(wb)

    for row_idx in range(sheet.nrows):
        row = sheet.row_values(row_idx)
        for date_column in date_column_list:
            if date_column in row:
                return num_rows_before_header, date_column, name
        num_rows_before_header += 1

    wb.close()

    return None, None, None

In [6]:
################## CHECK IMPORT SCRIPTS ARE WORKING ##################

ignore_list = [
    'data\\ddmmyyyy.xlsx', 'data\\mmddyyyy.xlsx', 'data\\yyyyddmm.xlsx', 'data\\yyyymmdd.xlsx',
    'data\\ddmmyyyy.csv', 'data\\mmddyyyy.csv', 'data\\yyyyddmm.csv', 'data\\yyyymmdd.csv',
    'data\\2006\\S62601_VOCS.csv', 'data\\2007\\S62601_VOCS.csv',    # sideways csv for some reason
    'data\\2008\\S90227_VOC.csv', 'data\\2009\\S90227_VOC.csv', 'data\\2010\\S90227_VOC.csv',  # sampling data relocated
    'data\\2015\\ChangeLog_Jan2017.xls'
]

def try_all_imports(skip_xls=False, skip_xlsx=False, skip_csv=False, ignore_list=None, filter_warnings=False):
    if filter_warnings:
        warnings.filterwarnings("error")
    n_errors, n_files, n_xls, n_xlsx, n_csv, n_xls_complete, n_xlsx_complete, n_csv_complete  = 0, 0, 0, 0, 0, 0, 0, 0
    error_list = list()

    for root, dirs, files in os.walk('data'):
        for filename in files:
            filepath = os.path.join(root, filename)
            if filepath in ignore_list:
                continue

            extension = filepath.split('.')[-1].lower()
            if skip_xls and extension == 'xls':
                continue
            elif skip_xlsx and extension == 'xlsx':
                continue
            elif skip_csv and extension == 'csv':
                continue
            elif extension not in ['xls', 'xlsx', 'csv']:
                continue

            name = filepath.split('.')[0].lower()
            if name[-2:].lower() == 'fr':
                continue
            else:
                n_files += 1
                try:
                    if extension == 'xls':
                        n_xls += 1
                        header, date_column, sheet_name = how_many_xls_rows_to_skip(filepath, ['Compounds', 'Sampling Date'])
                        workbook = xlrd.open_workbook(filepath, encoding_override='ISO-8859-1')
                        dataframe = pd.read_excel(workbook, header=header, sheet_name=sheet_name, engine='xlrd')
                        parse_dates(dataframe, date_column=date_column)
                        n_xls_complete += 1
                    elif extension == 'xlsx':
                        n_xlsx += 1
                        header, date_column, sheet_name = how_many_xlsx_rows_to_skip(filepath, ['Compounds', 'Sampling Date'])
                        parse_dates(pd.read_excel(filepath, header=header, sheet_name=sheet_name), date_column=date_column)
                        n_xlsx_complete += 1
                    elif extension == 'csv':
                        n_csv += 1
                        header, date_column = how_many_csv_rows_to_skip(filepath, ['Compounds', 'Sampling Date'])
                        parse_dates(csv_to_pandas(filepath), date_column=date_column)
                        n_csv_complete += 1
                except:
                    n_errors += 1
                    error_list.append(filepath)
                    
    if filter_warnings:
        warnings.resetwarnings()

    print(f'{n_errors} errors / {n_files} total files')
    print(f'xls: {n_xls_complete} out of {n_xls}')
    print(f'csv: {n_csv_complete} out of {n_csv}')
    print(f'xlsx: {n_xlsx_complete} out of {n_xlsx}')

    return error_list

##### XLSX
* seems to work

In [7]:
xlsx_errors = try_all_imports(skip_xls=True, skip_xlsx=False, skip_csv=True, ignore_list=ignore_list)

0 errors / 118 total files
xls: 0 out of 0
csv: 0 out of 0
xlsx: 118 out of 118


##### XLS
* seems to work

In [8]:
xls_errors = try_all_imports(skip_xls=False, skip_xlsx=True, skip_csv=True, ignore_list=ignore_list)

0 errors / 220 total files
xls: 220 out of 220
csv: 0 out of 0
xlsx: 0 out of 0


##### CSV

In [9]:
csv_errors = try_all_imports(skip_xls=True, skip_xlsx=True, skip_csv=False, ignore_list=ignore_list)

0 errors / 628 total files
xls: 0 out of 0
csv: 628 out of 628
xlsx: 0 out of 0


# Not perfect

In [13]:
import csv
import openpyxl
import os
import xlrd
from dateutil import parser
from datetime import datetime
from openpyxl.utils.datetime import from_excel as datetime_from_excel
import pandas as pd


################## CSV ##################

def how_many_csv_rows_to_skip(filepath, date_column_list):
    num_rows_before_header = 0

    with open(filepath, 'r', encoding='ISO-8859-1') as file:
        for row in csv.reader(file):
            for date_column in date_column_list:
                if date_column in row:
                    return num_rows_before_header, date_column
            num_rows_before_header += 1

    return None, None


def csv_to_pandas(filepath, date_column_list=['Compounds', 'Sampling Date']):
    num_rows_to_skip, date_column = how_many_csv_rows_to_skip(filepath, date_column_list)
    parsed_data = []

    with open(filepath, 'r', encoding='ISO-8859-1') as file:
        reader = csv.reader(file)

        for _ in range(num_rows_to_skip):
            next(reader)
        
        headers = next(reader)
        
        for row in reader:
            if all(item == '' for item in row):
                break
            parsed_data.append(row)

    df = pd.DataFrame(parsed_data, columns=headers)
    df[date_column] = pd.to_datetime(df[date_column])

    return df


################## XLSX ##################

def how_many_xlsx_rows_to_skip(filepath, date_column_list):
    num_rows_before_header = 0
    wb = openpyxl.load_workbook(filepath, read_only=True)

    sheet = None
    for name in wb.sheetnames:
        if 'voc' in name.lower():
            sheet = wb[name]
            break
        if 'data' in name.lower() and 'metadata' not in name.lower():
            sheet = wb[name]
            break

    for row in sheet.iter_rows(values_only=True):
        for date_column in date_column_list:
            if date_column in row:
                wb.close()
                return num_rows_before_header, date_column
        num_rows_before_header += 1

    return None, None


def xlsx_to_pandas(filepath, date_column_list=['Compounds', 'Sampling Date']):
    num_rows_to_skip, date_column = how_many_xlsx_rows_to_skip(filepath, date_column_list)
    wb = openpyxl.load_workbook(filepath)

    sheet = None
    for name in wb.sheetnames:
        if 'voc' in name.lower():
            sheet = wb[name]
            break
        if 'data' in name.lower() and 'metadata' not in name.lower():
            sheet = wb[name]
            break

    for _ in range(num_rows_to_skip):
        next(sheet.iter_rows())

    header_row = next(sheet.iter_rows(min_row=sheet.min_row + num_rows_to_skip, max_row=sheet.min_row + num_rows_to_skip, values_only=True))
    date_column_index = header_row.index(date_column) if date_column in header_row else None

    parsed_data = []
    for row in sheet.iter_rows(min_row=sheet.min_row + num_rows_to_skip + 1, values_only=True):
        row = list(row)  # Convert the tuple to a list for modification

        date_cell = row[date_column_index]
        if isinstance(date_cell, datetime):
            pass
        elif isinstance(date_cell, float):
            row[date_column_index] = datetime_from_excel(date_cell)
        elif date_cell:
            row[date_column_index] = parser.parse(date_cell)
            
        parsed_data.append(row)

    return pd.DataFrame(parsed_data, columns=header_row)


################## XLS ##################

def get_xls_sheet(workbook):
    for name in workbook.sheet_names():
        if 'voc' in name.lower():
            return workbook[name]
        elif 'data' in name.lower():
            return workbook[name]
    return workbook.sheet_by_index(0)


def how_many_xls_rows_to_skip(filepath, date_column_list):
    num_rows_before_header = 0
    wb = xlrd.open_workbook(filepath, encoding_override='ISO-8859-1')
    
    sheet = get_xls_sheet(wb)

    for row_idx in range(sheet.nrows):
        row = sheet.row_values(row_idx)
        for date_column in date_column_list:
            if date_column in row:
                return num_rows_before_header, date_column
        num_rows_before_header += 1

    return None, None


def xls_to_pandas(filepath, date_column_list=['Compounds', 'Sampling Date']):
    num_rows_to_skip, date_column = how_many_xls_rows_to_skip(filepath, date_column_list)
    wb = xlrd.open_workbook(filepath, encoding_override='ISO-8859-1')

    sheet = get_xls_sheet(wb)

    header_row = None
    for row_idx in range(num_rows_to_skip, sheet.nrows):
        row = sheet.row_values(row_idx)
        if date_column in row:
            header_row = row
            num_rows_to_skip = row_idx
            break

    date_column_index = header_row.index(date_column)

    parsed_data = []
    for row_idx in range(num_rows_to_skip + 1, sheet.nrows):
        row = sheet.row_values(row_idx)
        row[date_column_index] = xlrd.xldate_as_datetime(row[date_column_index], wb.datemode)
        parsed_data.append(row)

    return pd.DataFrame(parsed_data, columns=header_row)


In [14]:
################## CHECK IMPORT SCRIPTS ARE WORKING ##################

def check_all_imports(skip_xls=False, skip_xlsx=False, skip_csv=False, ignore_list=None):
    n_errors, n_files, n_xls, n_xlsx, n_csv, n_xls_complete, n_xlsx_complete, n_csv_complete  = 0, 0, 0, 0, 0, 0, 0, 0
    error_list = list()

    for root, dirs, files in os.walk('data'):
        for filename in files:
            filepath = os.path.join(root, filename)
            if filepath in ignore_list:
                continue

            extension = filepath.split('.')[-1].lower()
            if skip_xls and extension == 'xls':
                continue
            elif skip_xlsx and extension == 'xlsx':
                continue
            elif skip_csv and extension == 'csv':
                continue
            elif extension not in ['xls', 'xlsx', 'csv']:
                continue

            n_files += 1
            name = filepath.split('.')[0].lower()
            if name[-2:].lower() == 'fr':
                continue
            else:
                try:
                    if extension == 'xls':
                        n_xls += 1
                        xls_to_pandas(filepath)
                        n_xls_complete += 1
                    elif extension == 'xlsx':
                        n_xlsx += 1
                        xlsx_to_pandas(filepath)
                        n_xlsx_complete += 1
                    elif extension == 'csv':
                        n_csv += 1
                        csv_to_pandas(filepath)
                        n_csv_complete += 1
                except:
                    n_errors += 1
                    error_list.append(filepath)

    print(f'{n_errors} errors / {n_files} total files')
    print(f'xls: {n_xls_complete} out of {n_xls}')
    print(f'csv: {n_csv_complete} out of {n_csv}')
    print(f'xlsx: {n_xlsx_complete} out of {n_xlsx}')

    return error_list

In [5]:
ignore_list = [
    'data\\ddmmyyyy.xlsx', 'data\\mmddyyyy.xlsx', 'data\\yyyyddmm.xlsx', 'data\\yyyymmdd.xlsx',
    'data\\ddmmyyyy.csv', 'data\\mmddyyyy.csv', 'data\\yyyyddmm.csv', 'data\\yyyymmdd.csv',
    'data\\2006\\S62601_VOCS.csv', 'data\\2007\\S62601_VOCS.csv',    # sideways csv for some reason
    'data\\2008\\S90227_VOC.csv', 'data\\2009\\S90227_VOC.csv', 'data\\2010\\S90227_VOC.csv',  # sampling data relocated
    'data\\2015\\ChangeLog_Jan2017.xls'
]

In [11]:
# relevant imports
import warnings
# warnings.filterwarnings("error")
warnings.resetwarnings()

csv_errors = check_all_imports(skip_xls=True, skip_xlsx=True, skip_csv=False, ignore_list=ignore_list)

  df[date_column] = pd.to_datetime(df[date_column])
  df[date_column] = pd.to_datetime(df[date_column])
  df[date_column] = pd.to_datetime(df[date_column])
  df[date_column] = pd.to_datetime(df[date_column])
  df[date_column] = pd.to_datetime(df[date_column])
  df[date_column] = pd.to_datetime(df[date_column])
  df[date_column] = pd.to_datetime(df[date_column])
  df[date_column] = pd.to_datetime(df[date_column])
  df[date_column] = pd.to_datetime(df[date_column])
  df[date_column] = pd.to_datetime(df[date_column])
  df[date_column] = pd.to_datetime(df[date_column])
  df[date_column] = pd.to_datetime(df[date_column])
  df[date_column] = pd.to_datetime(df[date_column])
  df[date_column] = pd.to_datetime(df[date_column])
  df[date_column] = pd.to_datetime(df[date_column])
  df[date_column] = pd.to_datetime(df[date_column])
  df[date_column] = pd.to_datetime(df[date_column])
  df[date_column] = pd.to_datetime(df[date_column])
  df[date_column] = pd.to_datetime(df[date_column])
  df[date_co

1 errors / 786 total files
xls: 0 out of 0
csv: 627 out of 628
xlsx: 0 out of 0


In [9]:
csv_errors[0]

'data\\2005\\S100111_CSV.csv'

In [23]:
file = 'data\\2005\\S100111_CSV.csv'

df = pd.read_csv(file, header=1)
df['Compounds'] = pd.to_datetime(df['Compounds'])

df.dtypes

  df['Compounds'] = pd.to_datetime(df['Compounds'])


Compounds        datetime64[ns]
Ethane                  float64
Ethylene                float64
Acetylene               float64
Propylene               float64
                      ...      
Canister ID#             object
Sample Volume             int64
NAPS ID                   int64
START TIME               object
DURATION                  int64
Length: 185, dtype: object

In [28]:
file = 'data\\2005\\S100111_CSV.csv'
df = pd.read_csv(file, header=1)

try:
    df['Compounds'] = pd.to_datetime(df['Compounds'])
    print('dis de furst one')
except UserWarning:
    df['Compounds'] = pd.to_datetime(df['Compounds'], format="%m/%d/%y")
    print('WARNING!!!!!!!!!!!!!!!!!!!!!!')

dis de furst one


  df['Compounds'] = pd.to_datetime(df['Compounds'])


In [29]:
import warnings
warnings.filterwarnings("error")

file = 'data\\2005\\S100111_CSV.csv'
df = pd.read_csv(file, header=1)

try:
    df['Compounds'] = pd.to_datetime(df['Compounds'])
    print('dis de furst one')
except UserWarning:
    df['Compounds'] = pd.to_datetime(df['Compounds'], format="%m/%d/%y")
    print('WARNING!!!!!!!!!!!!!!!!!!!!!!')

warnings.resetwarnings()



%m/%d/%y


Compounds        datetime64[ns]
Ethane                  float64
Ethylene                float64
Acetylene               float64
Propylene               float64
                      ...      
Canister ID#             object
Sample Volume             int64
NAPS ID                   int64
START TIME               object
DURATION                  int64
Length: 185, dtype: object

In [17]:
df.dtypes

Compounds        datetime64[ns]
Ethane                  float64
Ethylene                float64
Acetylene               float64
Propylene               float64
                      ...      
Canister ID#             object
Sample Volume             int64
NAPS ID                   int64
START TIME               object
DURATION                  int64
Length: 185, dtype: object

In [None]:
import warnings
warnings.filterwarnings("error")

df = pd.read_csv(csv_errors[0], header=1)
try:
    df['Compounds'] = pd.to_datetime(df['Compounds'])
except UserWarning:
    df['Compounds'] = pd.to_datetime(df['Compounds'], format="%m/%d/%y")

warnings.resetwarnings()

In [25]:
import warnings
# warnings.filterwarnings("error")

df = pd.read_csv(csv_errors[0], header=1)
df['Compounds'] = pd.to_datetime(df['Compounds'])

warnings.resetwarnings()
df.head()

UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.

In [22]:
df = pd.read_csv(csv_errors[0], header=1)
try:
    df['Compounds'] = pd.to_datetime(df['Compounds'])
except UserWarning:
    df['Compounds'] = pd.to_datetime(df['Compounds'], format="%m/%d/%y")
df.head()

  df['Compounds'] = pd.to_datetime(df['Compounds'])


Unnamed: 0,Compounds,Ethane,Ethylene,Acetylene,Propylene,Propane,1-Propyne,Isobutane,1-Butene/Isobutene,"1,3-Butadiene",...,Hexanal,"2,5-Dimethylbenzaldehyde",Unnamed: 177,Sample ID#,Sample Date,Canister ID#,Sample Volume,NAPS ID,START TIME,DURATION
0,2005-01-04,5.995573,8.586041,5.415266,3.5576,16.2875,0.3977,14.7718,2.5781,0.6067,...,,,,va37y.d,1/4/05,EPS 019,500,100111,00:00,24
1,2005-01-16,4.748603,5.727945,3.326609,2.1433,7.7279,0.2335,8.2115,1.5817,0.3761,...,,,,va50y.d,1/16/05,EPS 332,500,100111,00:00,24
2,2005-01-22,1.675851,1.61846,1.47654,0.6944,2.897,0.0703,2.4787,0.4901,0.1079,...,,,,va51y.d,1/22/05,EPS 193,500,100111,00:00,24
3,2005-01-28,3.740149,4.561266,3.343005,1.6539,8.8572,0.1614,8.3509,1.123,0.2444,...,,,,va69y.d,1/28/05,EPS 385,500,100111,00:00,24
4,2005-02-09,4.208301,5.750098,4.190533,2.3232,11.664,0.2408,13.0853,1.9264,0.3362,...,,,,va70y.d,2/9/05,EPS 092,500,100111,00:00,24


##### XLSX works (or seems to)

In [4]:
xlsx_errors = check_all_imports(skip_xls=True, skip_xlsx=False, skip_csv=True, ignore_list=ignore_list)

0 errors / 236 total files
xls: 0 out of 0
csv: 0 out of 0
xlsx: 118 out of 118


##### CSV works (finally)

In [5]:
csv_errors = check_all_imports(skip_xls=True, skip_xlsx=True, skip_csv=False, ignore_list=ignore_list)

0 errors / 786 total files
xls: 0 out of 0
csv: 628 out of 628
xlsx: 0 out of 0


##### XLS works too!

In [6]:
xls_errors = check_all_imports(skip_xls=False, skip_xlsx=True, skip_csv=True, ignore_list=ignore_list)

0 errors / 260 total files
xls: 220 out of 220
csv: 0 out of 0
xlsx: 0 out of 0


##### Confirm everything works correctly with random inspection

In [7]:
list_of_files = list()
for root, dirs, files in os.walk('data'):
    for filename in files:
        filepath = os.path.join(root, filename)
        if filepath not in ignore_list:
            list_of_files.append(filepath)

list_of_files = [x for x in list_of_files if x not in ignore_list]
list_of_files = [x for x in list_of_files if x.split('.')[-2][-2:].lower() != 'fr']

In [8]:
import random

file = random.choice(list_of_files)
print(file)
extension = file.split('.')[-1].lower()

df = None
if extension == 'xls':
    df = xls_to_pandas(file)
elif extension == 'xlsx':
    df = xlsx_to_pandas(file)
elif extension == 'csv':
    df = csv_to_pandas(file)
else:
    print('NO EXTENSION')

if isinstance(df, pd.DataFrame):
    display(df.head())
    display(df.dtypes)

data\2018\S101005_VOC_2018_EN.csv


Unnamed: 0,NAPS ID,Sampling Date,Sample Type,Ethylene,Ethylene-MDL,Ethylene-VFlag,Acetylene,Acetylene-MDL,Acetylene-VFlag,Ethane,...,"1,2,4-Trichlorobenzene-VFlag",Naphthalene,Naphthalene-MDL,Naphthalene-VFlag,Dodecane,Dodecane-MDL,Dodecane-VFlag,Hexachlorobutadiene,Hexachlorobutadiene-MDL,Hexachlorobutadiene-VFlag
0,101005,2018-01-02,R,1.3475,0.1,,0.6709899999999999,0.2,,3.4743,...,,0.035942,0.2,,0.020506,0.1,,0.004883,0.2,
1,101005,2018-01-08,R,-999.0,-999.0,M1,-999.0,-999.0,M1,-999.0,...,M1,-999.0,-999.0,M1,-999.0,-999.0,M1,-999.0,-999.0,M1
2,101005,2018-01-14,R,-999.0,-999.0,M1,-999.0,-999.0,M1,-999.0,...,,0.029961,0.2,,0.007453999999999999,0.1,,0.003764,0.2,
3,101005,2018-01-20,R,-999.0,-999.0,M1,-999.0,-999.0,M1,-999.0,...,M1,-999.0,-999.0,M1,-999.0,-999.0,M1,-999.0,-999.0,M1
4,101005,2018-01-26,R,-999.0,-999.0,M1,-999.0,-999.0,M1,-999.0,...,,0.012833,0.2,,0.005603,0.1,,0.003168,0.2,


NAPS ID                              object
Sampling Date                datetime64[ns]
Sample Type                          object
Ethylene                             object
Ethylene-MDL                         object
                                  ...      
Dodecane-MDL                         object
Dodecane-VFlag                       object
Hexachlorobutadiene                  object
Hexachlorobutadiene-MDL              object
Hexachlorobutadiene-VFlag            object
Length: 330, dtype: object