In [1]:
import pandas as pd
import numpy as np
import xarray as xr 

In [2]:
# load in master RCA instrument list
master_df = pd.read_csv('./params/RCA-InstrumentList.csv')

In [3]:
master_df

Unnamed: 0,assetID,instrumentType,mfgSN,SNnotes
0,ATOSU-69825-00001,ADCPS-I,21498,
1,ATOSU-69825-00002,ADCPS-I,18153,
2,ATOSU-69825-00003,ADCPS-I,18919,
3,ATAPL-58419-00001,ADCPS-K,18444,
4,ATAPL-58419-00002,ADCPS-K,18975,
...,...,...,...,...
387,ATAPL-71444-00003,DP-VEHICLE,3,
388,ATAPL-71444-00004,DP-VEHICLE,4,
389,ATAPL-71444-00005,DP-VEHICLE,5,
390,ATAPL-71444-00006,DP-VEHICLE,6,


In [4]:
# load manual image overview CSV for the current year
imageSN_2021 = pd.read_csv('./params/imageSN_2021.csv')
imageSN_2023 = pd.read_csv('./params/imageSN_2023.csv')

In [5]:
imageSN_2021

Unnamed: 0,referenceDesignator,deployYear,imageFile,imageSerialNumber,imageAssetID
0,CE02SHBP-LJ01D-05-ADCPTB104,2020,IMG_4469.HEIC,19003,ATOSU-69826-00002
1,CE02SHBP-LJ01D-06-CTDBPN106,2020,IMG_4466.HEIC,7230,ATOSU-69827-00001
2,CE02SHBP-LJ01D-06-DOSTAD106,2020,IMG_4467.HEIC,216,ATOSU-58320-00020
3,CE02SHBP-LJ01D-07-VEL3DC108,2020,IMG_4468.HEIC,50078167,ATOSU-69829-00003
4,CE02SHBP-LJ01D-08-OPTAAD106,2020,IMG_4465.HEIC,169,ATOSU-58332-00009
...,...,...,...,...,...
64,RS03AXPS-SF03A-3B-OPTAAD301,2020,IMG_1546.JPG,134,ATAPL-58332-00001
65,RS03AXPS-SF03A-3D-SPKIRA301,2020,IMG_1548.JPG,244,ATAPL-58341-00002
66,RS03AXPS-SF03A-4A-NUTNRA301,2020,IMG_1544.JPG,379,ATAPL-68020-00002
67,RS03AXPS-SF03A-4B-VELPTD302,2020,IMG_1544.JPG,6334,ATAPL-70114-00002


In [6]:
imageSN_2023.rename(columns={'ReferenceDesignator': 'referenceDesignator'}, inplace=True)

In [7]:
imageSN_2023

Unnamed: 0,referenceDesignator,deployYear,imageFile,imageSerialNumber,imageAssetID
0,RS01SUM2-MJ01B-12-ADCPSK101,2018,/Volumes/Data0/Archive/Cruise_data/RCA/Visions...,18977,
1,RS01SLBS-LJ01A-10-ADCPTE101,2020,/Volumes/Data0/Archive/Cruise_data/RCA/Visions...,18813,
2,RS01SLBS-LJ01A-11-OPTAAC103,2020,/Volumes/Data0/Archive/Cruise_data/RCA/Visions...,248,ATAPL-69943-00008
3,RS01SLBS-LJ01A-12-CTDPFB101,2020,/Volumes/Data0/Archive/Cruise_data/RCA/Visions...,4830-67627,ATAPL-67627-00002
4,RS01SLBS-LJ01A-12-DOSTAD101,2020,/Volumes/Data0/Archive/Cruise_data/RCA/Visions...,,ATAPL-58320-00014
...,...,...,...,...,...
191,RS03INT1-MJ03C-05-CAMDSB303,2023,/Volumes/Data0/Archive/Cruise_data/RCA/Visions...,301,ATAPL-70101-00022
192,RS03INT1-MJ03C-07-PPSDNA301,2023,/Volumes/Data0/Archive/Cruise_data/RCA/Visions...,,ATAPL-58338-00001
193,RS03INT1-MJ03C-07-RASFLA301,2023,/Volumes/Data0/Archive/Cruise_data/RCA/Visions...,12881-02,
194,RS03INT1-MJ03C-09-TRHPHA302,2023,/Volumes/Data0/Archive/Cruise_data/RCA/Visions...,2,


In [8]:
imageSN = pd.concat([imageSN_2023, imageSN_2021], ignore_index=True)

In [9]:
imageSN

Unnamed: 0,referenceDesignator,deployYear,imageFile,imageSerialNumber,imageAssetID
0,RS01SUM2-MJ01B-12-ADCPSK101,2018,/Volumes/Data0/Archive/Cruise_data/RCA/Visions...,18977,
1,RS01SLBS-LJ01A-10-ADCPTE101,2020,/Volumes/Data0/Archive/Cruise_data/RCA/Visions...,18813,
2,RS01SLBS-LJ01A-11-OPTAAC103,2020,/Volumes/Data0/Archive/Cruise_data/RCA/Visions...,248,ATAPL-69943-00008
3,RS01SLBS-LJ01A-12-CTDPFB101,2020,/Volumes/Data0/Archive/Cruise_data/RCA/Visions...,4830-67627,ATAPL-67627-00002
4,RS01SLBS-LJ01A-12-DOSTAD101,2020,/Volumes/Data0/Archive/Cruise_data/RCA/Visions...,,ATAPL-58320-00014
...,...,...,...,...,...
260,RS03AXPS-SF03A-3B-OPTAAD301,2020,IMG_1546.JPG,134,ATAPL-58332-00001
261,RS03AXPS-SF03A-3D-SPKIRA301,2020,IMG_1548.JPG,244,ATAPL-58341-00002
262,RS03AXPS-SF03A-4A-NUTNRA301,2020,IMG_1544.JPG,379,ATAPL-68020-00002
263,RS03AXPS-SF03A-4B-VELPTD302,2020,IMG_1544.JPG,6334,ATAPL-70114-00002


In [10]:
def process_mfgSN(row):
    # some assests have multiple manufacturer serial numbers so we eplit them out
    mfgSN_values = row['mfgSN'].split(',')
    assetID = row['assetID']
    
    return {mfgSN_value: assetID for mfgSN_value in mfgSN_values}

# Applying the function along rows (axis=1)
mfg_dict = master_df.apply(process_mfgSN, axis=1).to_list()

In [11]:
# many dicts to one dict {'manufacturer_serialnumber' : 'assetID'} 
final_mfg_dict = {k: v for mfg_dict in mfg_dict for k, v in mfg_dict.items()}

In [12]:
# note that this has some repeated values because there are multiple serial numbers per assetID
final_mfg_dict

{'21498': 'ATOSU-69825-00001',
 '18153': 'ATOSU-69825-00002',
 '18919': 'ATOSU-69825-00003',
 '18444': 'ATAPL-58419-00001',
 '18975': 'ATAPL-58419-00002',
 '18977': 'ATAPL-58419-00003',
 '18493': 'ATOSU-69826-00001',
 '19003': 'ATOSU-69826-00002',
 '22115': 'ATOSU-69826-00003',
 '18478': 'ATAPL-58315-00001',
 '18974': 'ATAPL-58315-00002',
 '18980': 'ATAPL-58315-00003',
 '23338': 'ATAPL-58315-00004',
 '23339': 'ATAPL-58315-00005',
 '18471': 'ATAPL-68073-00001',
 '18813': 'ATAPL-68073-00002',
 '19224': 'ATAPL-68073-00003',
 '23442': 'ATAPL-68073-00004',
 '23443': 'ATAPL-68073-00005',
 '1023': 'PIRSN-PADCPA-00001',
 '3': 'ATAPL-71444-00003',
 ' 9651': 'ATAPL-58316-00001',
 ' 8643': 'ATAPL-58316-00001',
 ' 120785': 'ATAPL-58316-00001',
 '4': 'ATAPL-71444-00004',
 ' 9655': 'ATAPL-58316-00002',
 ' 3616': 'ATAPL-58316-00002',
 ' 120788': 'ATAPL-58316-00002',
 '5': 'ATAPL-71444-00005',
 ' 9676': 'ATAPL-58316-00003',
 ' 9914': 'ATAPL-58316-00003',
 ' 120798': 'ATAPL-58316-00003',
 '7': 'ATAPL-7

In [13]:
def process_imageSN(row, mfg_dict):
    
    print('-----------------------------------------------')
    imageSerialNumbers = row['imageSerialNumber']
    imageAssetID = row['imageAssetID']
    
    if type(imageSerialNumbers) is str and '/' in imageSerialNumbers:
        imageSerialNumbers = row['imageSerialNumber'].split('/')
        print(f"it looks like there are multiple / SNs in this cell {imageSerialNumbers}")
    elif type(imageSerialNumbers) is str and ',' in imageSerialNumbers:
        imageSerialNumbers = row['imageSerialNumber'].split(',')
        print(f"it looks like there are multiple , SNs in this cell {imageSerialNumbers}")
    else:
        imageSerialNumbers = [imageSerialNumbers] # both list for consistency

    # list of matching asset ids
    match_list = []
    # proportion of serial number which matches if it is a partial match
    prop_list = []
    # list of the manufacturer serial numbers that match the imageSN
    mfg_sn_list = []

    # exact SN match
    exact_match_found = False 
    # assetID, exact SN or partial SN match
    any_match_found = False
    # exact assetID match
    exact_asset_match_found = False
    # any assetID match
    any_asset_match_found = False

    for imageSerialNumber in imageSerialNumbers:
        print(f"matching {imageSerialNumber}...")
        for manufacturerSerialNumber in mfg_dict.keys():
            if type(imageSerialNumber) is str and imageSerialNumber == manufacturerSerialNumber:
                print("exact match found")
                matching_assetID = mfg_dict[manufacturerSerialNumber]
                match_list.append(matching_assetID)
                mfg_sn_list.append(manufacturerSerialNumber)

                match_list = list(set(match_list))
    
                exact_match_found = True
                any_match_found = True
            
        if exact_match_found == False:
            for manufacturerSerialNumber in mfg_dict.keys():
                if type(imageSerialNumber) is str and imageSerialNumber in manufacturerSerialNumber and imageSerialNumber != manufacturerSerialNumber:
                    print("found a partial match")
                    matching_assetID = mfg_dict[manufacturerSerialNumber]
                    match_prop = round(len(imageSerialNumber) / len(manufacturerSerialNumber),2)
                    match_list.append(matching_assetID)
                    prop_list.append(match_prop)
                    mfg_sn_list.append(manufacturerSerialNumber)

                    match_list = list(set(match_list))
    
                    any_match_found = True

        if any_match_found == False:
            print(f"still no match - attempting to match by assetID <{imageAssetID}> instead...")
            for master_assetID in mfg_dict.values():
                if type(imageAssetID) is str and imageAssetID == master_assetID:
                    print("exact assetID match found")
                    matching_assetID = master_assetID
                    match_list.append(matching_assetID)

                    match_list = list(set(match_list))
    
                    any_match_found = True
                    exact_asset_match_found = True
                
            
    if len(match_list) > 0:
        row['matching_asset_ids'] = match_list
    else: 
        row['matching_asset_ids'] = np.nan

    if len(prop_list) > 0: 
        row['proportion_match'] = prop_list
    else: 
        row['proportion_match'] = np.nan

    if len(mfg_sn_list) > 0:
        row['matching_mfg_sn'] = mfg_sn_list
    else:
        row['matching_mfg_sn'] = np.nan

    if exact_match_found == False:
        row["exact_SN_match"] = np.nan
    else:
        row["exact_SN_match"] = True

    if exact_asset_match_found == False:
        row["exact_assetID_match"] = np.nan
    else:
        row["exact_assetID_match"] = True

    if any_match_found == False:
        row["any_match"] = np.nan
    else:
        row["any_match"] = True

    return row



In [14]:
imageSN = imageSN.apply(process_imageSN, axis=1, args=(final_mfg_dict,))

-----------------------------------------------
matching 18977...
exact match found
-----------------------------------------------
matching 18813...
exact match found
-----------------------------------------------
matching 248...
exact match found
-----------------------------------------------
matching 4830-67627...
still no match - attempting to match by assetID <ATAPL-67627-00002> instead...
exact assetID match found
-----------------------------------------------
matching nan...
still no match - attempting to match by assetID <ATAPL-58320-00014> instead...
exact assetID match found
-----------------------------------------------
matching nan...
still no match - attempting to match by assetID <ATAPL-70150-00006> instead...
-----------------------------------------------
matching nan...
still no match - attempting to match by assetID <ATAPL-70101-00003> instead...
-----------------------------------------------
matching 19003...
exact match found
-----------------------------------

In [15]:
imageSN

Unnamed: 0,referenceDesignator,deployYear,imageFile,imageSerialNumber,imageAssetID,matching_asset_ids,proportion_match,matching_mfg_sn,exact_SN_match,exact_assetID_match,any_match
0,RS01SUM2-MJ01B-12-ADCPSK101,2018,/Volumes/Data0/Archive/Cruise_data/RCA/Visions...,18977,,[ATAPL-58419-00003],,[18977],True,,True
1,RS01SLBS-LJ01A-10-ADCPTE101,2020,/Volumes/Data0/Archive/Cruise_data/RCA/Visions...,18813,,[ATAPL-68073-00002],,[18813],True,,True
2,RS01SLBS-LJ01A-11-OPTAAC103,2020,/Volumes/Data0/Archive/Cruise_data/RCA/Visions...,248,ATAPL-69943-00008,[ATAPL-69943-00008],,[248],True,,True
3,RS01SLBS-LJ01A-12-CTDPFB101,2020,/Volumes/Data0/Archive/Cruise_data/RCA/Visions...,4830-67627,ATAPL-67627-00002,[ATAPL-67627-00002],,,,True,True
4,RS01SLBS-LJ01A-12-DOSTAD101,2020,/Volumes/Data0/Archive/Cruise_data/RCA/Visions...,,ATAPL-58320-00014,[ATAPL-58320-00014],,,,True,True
...,...,...,...,...,...,...,...,...,...,...,...
260,RS03AXPS-SF03A-3B-OPTAAD301,2020,IMG_1546.JPG,134,ATAPL-58332-00001,[ATAPL-58332-00001],,[134],True,,True
261,RS03AXPS-SF03A-3D-SPKIRA301,2020,IMG_1548.JPG,244,ATAPL-58341-00002,[ATAPL-58341-00002],,[244],True,,True
262,RS03AXPS-SF03A-4A-NUTNRA301,2020,IMG_1544.JPG,379,ATAPL-68020-00002,[ATAPL-68020-00002],,[379],True,,True
263,RS03AXPS-SF03A-4B-VELPTD302,2020,IMG_1544.JPG,6334,ATAPL-70114-00002,[ATAPL-70114-00002],[0.5],[AQS-6334],,,True


In [16]:
imageSN.to_csv("./reportOuts/matchedSerialNumbers_2023.csv")