In [1]:
# Build deployment history by instrument type

In [2]:
# Output csv file for each instrument type, formatted as:
# sensorType,referenceDesignator,startTime,endTime,assetID,instrumentSN,calibrationFile

In [3]:
### import packages

import datetime
import metadataFunctions as mf
import numpy as np
import os
from os import path
import pandas as pd
import re
import subprocess
from subprocess import Popen
import glob


In [4]:
### setup directory pathways
amRepo = path.dirname('/Users/rsn/asset-management/')
calRepo = path.dirname('/Users/rsn/calibrationFiles/')
workingDir = path.dirname('/Users/rsn/Desktop/metaDataScripts/')

fileOutDir = path.dirname('/Users/rsn/Desktop/metaDataScripts/deployments/')
githubLink = 'https://github.com/oceanobservatories/asset-management/tree/master/calibration/'
calRepoLink = 'https://github.com/OOI-CabledArray/calibrationFiles/blob/master/'


In [8]:
### load in github deployment sheets
CabledArray = pd.Series(['CE02SHBP','CE04OSBP','CE04OSPD','CE04OSPS','RS01SBPD','RS01SBPS',
                        'RS01SLBS','RS01SUM1','RS03AXBS','RS03AXPD','RS03AXPS','RS03INT2',
                        'RS03INT1','RS01SUM2','RS03CCAL','RS03ECAL','RS03ASHS'])

deploymentSheets = []
for array in CabledArray:
    deployFilePath = amRepo + '/deployment/' + array + '_Deploy.csv' 
    deploymentSheets.append(deployFilePath)
    
df_deploy = pd.concat([pd.read_csv(f, skip_blank_lines = True, comment='#') for f in deploymentSheets], ignore_index = True)

### load in ITM
ITM_list = pd.read_csv(workingDir + '/ITM.csv')

ITM_list['mfgSN'] = ITM_list['mfgSN'].str.split(', ')
ITM_list['assignments'] = ITM_list['assignments'].str.split(', ')
ITM_list['calibrations'] = ITM_list['calibrations'].str.split(', ')
ITM_list['instrumentType'] = ITM_list['instrumentType'].str.split(',')

### load github calibration directory list for sensors with cal sheets
githubFileList = []
sensorsWithCals = ['CTDBPN','CTDBPO','CTDPFA','CTDPFB','DOFSTA','DOSTAD','FLCDRA','FLNTUA','FLORDD', \
                   'HYDBBA','HYDLFA','NUTNRA','OBSBBA','OBSSPA','OPTAAC','OPTAAD','PARADA','PCO2WA', \
                   'PCO2WB','PHSENA', 'PHSEND','SPKIRA','TMPSFA']

for sensor in sensorsWithCals:
    sensorDir = amRepo + '/calibration/' + sensor
    if os.path.isdir(sensorDir):
        fileList = os.listdir(sensorDir)
        for csvFile in fileList:
            if str.startswith(csvFile,'AT'):
                githubFileList.append(amRepo + '/calibration/' + sensor + '/' + csvFile)
            
### load local calibration Repo directory list 
#calRepoList = glob.glob(calRepo + '/*/*')
calRepoFileList = glob.glob(calRepo + '/*/*')

#calRepoFileList = []
#calFile_prefix = ['ATAPL','ATOSU','ATCWK']
#for calFile in calRepoList:
#    if any(x == calFile for x in calFile_prefix):
#        print(calFile)
#        calRepoFileList.append(calFile)   

In [10]:
### Create metadata dictionaries

# Deployment sheet Reference Designator key to startDate, AssetID, rawFile
df_deploy_sort = df_deploy.sort_values(by=["Reference Designator","startDateTime"],ascending=False)

RefDes_dict = {}
for i in df_deploy_sort['Reference Designator'].unique():
    RefDes_dict[i] = [{'deployDate':datetime.datetime.strptime(df_deploy_sort['startDateTime'][j], '%Y-%m-%dT%H:%M:%S'), 'deployEnd':df_deploy_sort['stopDateTime'][j], 'AssetID':df_deploy_sort['sensor.uid'][j], 'deployNum':df_deploy_sort['deploymentNumber'][j],'vendorCalFile': 'none','githubCalFile': 'none','instrumentType': 'none','instrumentSN': 'none'} for j in df_deploy_sort[df_deploy_sort['Reference Designator']==i].index]

# ITM AssetID key to mfgSN, assignments, calibrations, instrumentType
ITM_dict = ITM_list.set_index('assetID').T.to_dict('series')

# Github sensor cals AssetID key to calibration dates (extracted from fileNames)
githubSensorCals = {}
for githubFile in githubFileList:
    fileLink = githubFile.replace('/Users/rsn/asset-management/calibration/',githubLink)
    fileBits = re.search(r"/.*/.*/.*/.*/.*/((.*)__(.*).csv)",githubFile)
    if fileBits:
        if fileBits.group(2) not in githubSensorCals:
            githubSensorCals[fileBits.group(2)] = {'calFile': []}
        githubSensorCals[fileBits.group(2)]['calFile'].append([datetime.datetime.strptime(fileBits.group(3), '%Y%m%d'),fileLink])
        
# Github sensor cals AssetID key to calibration dates (extracted from fileNames)
vendorSensorCals = {}
for vendorFile in calRepoFileList:
    fileLink = vendorFile.replace('/Users/rsn/calibrationFiles/',calRepoLink)
    fileBits = re.search(r"/.*/.*/.*/.*/((.*)__([0-9]*).*\..*$)",vendorFile)
    if fileBits:
        if fileBits.group(2) not in vendorSensorCals:
            vendorSensorCals[fileBits.group(2)] = {'calFile': []}
        vendorSensorCals[fileBits.group(2)]['calFile'].append([datetime.datetime.strptime(fileBits.group(3), '%Y%m%d'),fileLink])
        

In [11]:
instType = []

for key,values in RefDes_dict.items():
    for deploy in RefDes_dict[key]:  
        #*# lookup instrumentType and instrumentSN
        if deploy['AssetID'] in ITM_dict:
            deploy['instrumentType'] = '_'.join(ITM_dict[deploy['AssetID']]['instrumentType']).replace('-','')
            deploy['instrumentSN'] = ITM_dict[deploy['AssetID']]['mfgSN']
        else:
            print('AssetID not in ITM')
            print(deploy['AssetID'])
            deploy['instrumentType'] = 'noValidType'
            deploy['instrumentSN'] = ['noValidSN']
        instType.append(deploy['instrumentType'])
        #*# assign github calibration file
        if deploy['AssetID'] in githubSensorCals:
            calHistory = githubSensorCals[deploy['AssetID']]
            calDateList = list(filter(lambda d: d[0] < deploy['deployDate'], calHistory['calFile']))
            if calDateList:
                deploymentCalFile = min(calDateList, key = lambda x: abs(x[0]-deploy['deployDate']))
                #*# is calibration file available?
                if deploymentCalFile:
                    deploy['githubCalFile'] = deploymentCalFile[1]
            else:
                deploy['githubCalFile'] = 'noValidCalFile'
                
        #*# assign vendor calibration file
        if deploy['AssetID'] in vendorSensorCals:
            calHistory = vendorSensorCals[deploy['AssetID']]
            calDateList = list(filter(lambda d: d[0] < deploy['deployDate'], calHistory['calFile']))
            if calDateList:
                ### TODO: add capability to list multiple files as vendor file...i.e. OPTAAC ".cal" + ".dev"
                deploymentCalFile = min(calDateList, key = lambda x: abs(x[0]-deploy['deployDate']))
                #*# is calibration file available?
                if deploymentCalFile:
                    deploy['vendorCalFile'] = deploymentCalFile[1]
            else:
                deploy['vendorCalFile'] = 'noValidCalFile'

        
        

In [12]:
instTypes = set(instType)

for inst in instTypes:
    deploymentList = []
    for key,values in RefDes_dict.items():
        for deployment in RefDes_dict[key]:
            if inst in deployment['instrumentType']:
                deploymentList.append([deployment['instrumentType'],key,deployment['deployDate'],deployment['deployEnd'],deployment['AssetID'],deployment['instrumentSN'],deployment['githubCalFile'],deployment['vendorCalFile']])

    deploymentList_sorted = sorted(deploymentList, key = lambda deploymentList: (deploymentList[1], deploymentList[2]))
    fileName = fileOutDir + '/' + inst + '_deployments.csv'    
    with open(fileName,'w') as f:
        f.write('sensorType,referenceDesignator,startTime,endTime,assetID,instrumentSN,githubCalibrationFile,vendorCalibrationFile\n')
        for entry in deploymentList_sorted:
            f.write("%s,%s,%s,%s,%s,\"%s\",%s,%s\n" % (entry[0],entry[1],entry[2],entry[3],entry[4],entry[5],entry[6],entry[7]))
        

In [20]:

keyList = []
for inst in instTypes:
    for key,values in RefDes_dict.items():
        for deployment in RefDes_dict[key]:
            if inst in deployment['instrumentType']:
                keyList.append(key)

keyList_unique = set(keyList)

#print(keyList_unique)
with open('refDesList.csv','w') as f:
    f.write('referenceDesignator\n')
    for entry in keyList_unique:
        f.write("%s\n" % (entry))
        