In [1]:
import csv
import pandas as pd
import numpy as np
import ctdcal.fit_ctd as fit_ctd
import os

In [2]:
salt_Dir = './data/salt/'

file_list = os.listdir(salt_Dir)
files = []
for file in file_list:
    if '.' not in file:
        files.append(file)

In [3]:
files

['04001',
 '02101',
 '08201',
 '06501',
 '08801',
 '00401',
 '08601',
 '00202',
 '06101',
 '02501',
 '04401',
 '10001',
 '01802',
 '01001',
 '09601',
 '07101',
 '03501',
 '11001',
 '05401',
 '05802',
 '11401',
 '05001',
 '03101',
 '09201',
 '07501',
 '09801',
 '01401',
 '08101',
 '06601',
 '00701',
 '10701',
 '04301',
 '02801',
 '02201',
 '04901',
 '02601',
 '10301',
 '04701',
 '00301',
 '06801',
 '08501',
 '06201',
 '11901',
 '03601',
 '05701',
 '11301',
 '01301',
 '07801',
 '09501',
 '07201',
 '01901',
 '09101',
 '07601',
 '01701',
 '05301',
 '11701',
 '03801',
 '03201',
 '05901',
 '00601',
 '06701',
 '08001',
 '04801',
 '02301',
 '02901',
 '10601',
 '04201',
 '10201',
 '04601',
 '02701',
 '10801',
 '00801',
 '08401',
 '06901',
 '05601',
 '11201',
 '03701',
 '11801',
 '07301',
 '09401',
 '07901',
 '01201',
 '01601',
 '07701',
 '09001',
 '03301',
 '03901',
 '05201',
 '11601',
 '02001',
 '04101',
 '10501',
 '00501',
 '08901',
 '12001',
 '08301',
 '06001',
 '08701',
 '00101',
 '04501',


In [4]:
def salt_loader(saltpath):
    f = open(saltpath, newline='')
    saltF = csv.reader(f,delimiter=' ', quoting=csv.QUOTE_NONE, skipinitialspace='True')
    
    saltArray = []
    for row in saltF:
        saltArray.append(row)
    del saltArray[0]
         
    header = ['STNNBR','CASTNO','SAMPNO','BathTEMP','CRavg','autosalSAMPNO',\
              'Unknown','StartTime','EndTime','Attempts','Reading1','Reading2',\
              'Reading3', 'Reading4', 'Reading5','Reading6','Reading7','Reading8',\
              'Reading9', 'Reading10','Reading11','Reading12']
    f.close()
    # make all rows of Salt files the same length as header   
    for row in saltArray:
        if len(row) < len(header):
            row.extend([np.NaN]*(len(header)-len(row)))
            
    saltArray = np.array(saltArray) # change to np array
    
    saltDF = pd.DataFrame(saltArray,columns=header) # change to DataFrame
    saltDF = saltDF.apply(pd.to_numeric, errors='ignore')
    saltDF.replace(to_replace='nan', value=np.NaN,inplace=True)
    saltDF.dropna(axis=1,how='all',inplace=True)
    saltDF = saltDF[saltDF['autosalSAMPNO'] != 'worm']
    saltDF['SALNTY'] = fit_ctd.SP_salinometer((saltDF['CRavg']/2.0),saltDF['BathTEMP'])
    return saltDF

def salt_df_parser(saltDF, outdir, stn_col = 'STNNBR', cast_col = 'CASTNO'):
    stations = saltDF[stn_col].unique()
    for station in stations:
        saltStation = saltDF[saltDF[stn_col] == station]
        casts = saltStation[cast_col].unique()
        for cast in casts:
            stn_cast_salts = saltStation[saltStation[cast_col] == cast]
        # write out individual cast files
            station_string = str(station)
            cast_string = str(cast)
            if len(station_string) == 2:
                station_string = '0' + station_string
            if len(station_string) == 1:
                station_string = '00' + station_string
            if len(cast_string) == 1:
                cast_string = '0' + cast_string
            
            outfile = outdir + station_string + cast_string + '_salts.csv'
            if not os.path.exists(outfile):
                stn_cast_salts.to_csv(outfile,index=False)
            else:
                print(outfile + ' already exists...skipping')
        

In [7]:
for file in files:
    print(file)
    salt_path = salt_Dir + file
    saltDF = salt_loader(saltpath=salt_path)
    salt_df_parser(saltDF, salt_Dir)

04001
./data/salt/04001_salts.csv already exists...skipping
02101
./data/salt/02101_salts.csv already exists...skipping
08201
./data/salt/08201_salts.csv already exists...skipping
06501
./data/salt/06501_salts.csv already exists...skipping
08801
./data/salt/08801_salts.csv already exists...skipping
00401
./data/salt/00401_salts.csv already exists...skipping
08601
./data/salt/08601_salts.csv already exists...skipping
00202
./data/salt/00202_salts.csv already exists...skipping
06101
./data/salt/06101_salts.csv already exists...skipping
02501
./data/salt/02501_salts.csv already exists...skipping
04401
./data/salt/04401_salts.csv already exists...skipping
10001
./data/salt/10001_salts.csv already exists...skipping
01802
./data/salt/01802_salts.csv already exists...skipping
01001
./data/salt/01001_salts.csv already exists...skipping
09601
./data/salt/09601_salts.csv already exists...skipping
07101
./data/salt/07101_salts.csv already exists...skipping
03501
./data/salt/03501_salts.csv alread

In [None]:
os.path.exists()

In [None]:
help(pd.DataFrame.shift)