## Adding Manufacturer information to UCSF Series Subsmission TSVs

In [1]:
import pandas as pd
import numpy as np
import os

#### Grab folder of series in batch and the manufacturer and model table

In [2]:
directory1 = '/content/drive/MyDrive/batch_1_series'
directory2 = '/content/drive/MyDrive/manufacturer_and_model.csv'

#### Convert manufacturer and model table to dataframe

In [3]:
df_manufacturer = pd.read_csv(directory2)
df_manufacturer

Unnamed: 0,Anon MRN,Anon Acc #,Anon Study UID,Anon Series UID,Manufacturer,Model
0,419639-001543,4288,1.2.826.0.1.3680043.10.474.419639.194230936802...,1.2.826.0.1.3680043.10.474.419639.188883800764...,GE MEDICAL SYSTEMS,LightSpeed VCT
1,419639-001543,4288,1.2.826.0.1.3680043.10.474.419639.194230936802...,1.2.826.0.1.3680043.10.474.419639.221684187504...,GE MEDICAL SYSTEMS,LightSpeed VCT
2,419639-001543,4288,1.2.826.0.1.3680043.10.474.419639.194230936802...,1.2.826.0.1.3680043.10.474.419639.254330638483...,GE MEDICAL SYSTEMS,LightSpeed VCT
3,419639-001543,4288,1.2.826.0.1.3680043.10.474.419639.194230936802...,1.2.826.0.1.3680043.10.474.419639.168203265344...,GE MEDICAL SYSTEMS,LightSpeed VCT
4,419639-001543,4288,1.2.826.0.1.3680043.10.474.419639.194230936802...,1.2.826.0.1.3680043.10.474.419639.178547372178...,GE MEDICAL SYSTEMS,LightSpeed VCT
...,...,...,...,...,...,...
122784,419639-013404,54847,1.2.826.0.1.3680043.10.474.419639.171046870417...,1.2.826.0.1.3680043.10.474.419639.248348309363...,Canon Inc.,CXDI Control Software NE
122785,419639-013403,54852,1.2.826.0.1.3680043.10.474.419639.148562626697...,1.2.826.0.1.3680043.10.474.419639.260920275017...,GE Healthcare,Discovery XR656
122786,419639-013406,54854,1.2.826.0.1.3680043.10.474.419639.185210959096...,1.2.826.0.1.3680043.10.474.419639.275464002775...,Canon Inc.,CXDI Control Software NE
122787,419639-013397,54844,1.2.826.0.1.3680043.10.474.419639.116210965643...,1.2.826.0.1.3680043.10.474.419639.508905225610...,GE Healthcare,Discovery XR656


#### Function that adds the manufacturer and model info to the series TSVs, using the series UID as a key to merge

In [4]:
def add_manufacturer_info(dir, df2):
  for filename in os.listdir(dir): #iterate through folder of series TSVs
    f = os.path.join(dir,filename) #grab file path
    csv_table = pd.read_table(f,sep='\t') 
    name = filename.replace('tsv', 'csv')
    csv_table.to_csv(name,index=False) #convert series tsv to csv
    df = pd.read_csv(name) #convert table to dataframe
    updated_series = pd.merge(df,df2, left_on= 'series_uid', right_on='Anon Series UID', how = 'left') #merge manufacturer/model df with series df using series UID
    updated_series = updated_series.drop_duplicates(subset=['Anon Series UID'])
    #clean up new series df:
    updated_series['manufacturer'] = updated_series['Manufacturer'] 
    updated_series['manufacturer_model_name'] = updated_series['Model']
    updated_series = updated_series.drop(columns=['Anon MRN', 'Anon Acc #', 'Anon Study UID', 'Anon Series UID', 'Manufacturer', 'Model'])
    updated_series.to_csv(filename, sep='\t', index=False) #convert new series df to TSV

#### Run function on folder of series TSVs and manufacturer/model table

In [5]:
add_manufacturer_info(directory1, df_manufacturer) 