##### `If your files originate from a Thermo instrument:`

Copy all your *.raw files to data/raw directory and you can use the following script to generate a samples.tsv file automatically:

In [None]:
!(cd data/raw && ls *.raw > filelist.txt)
import pandas as pd
header_list = ["sample_name"]
df=pd.read_csv("data/raw/filelist.txt", names=header_list, index_col= None)
df["sample_name"]=df["sample_name"].replace(".raw*", value="", regex=True)
df["comment"] = " "
df["MAPnumber"] = " "
df.to_csv("config/samples.tsv", sep="\t")
df

##### `If your files originate from another instrument:`

Copy all your already converted files *.mzML to data/mzML directory and you can use the following script to generate a samples.tsv file automatically:

In [None]:
!(cd data/mzML && ls *.mzML > filelist.txt)
import pandas as pd
header_list = ["sample_name"]
df=pd.read_csv("data/mzML/filelist.txt", names=header_list, index_col= None)
df["sample_name"]=df["sample_name"].replace(".mzML*", value="", regex=True)
df["comment"] = " "
df["MAPnumber"] = " "
df.to_csv("config/samples.tsv", sep="\t")
df

##### `Create a GNPS metadata table:`
This is datafile-dependent so it is preferable to do it interactively through a Jupyter notebook

In [32]:
!(cd results/Interim/mzML && ls PCpeak*.mzML > filelist.txt)

In [None]:
import numpy as np 
import os 

path= "results/GNPSexport/"
isExist= os.path.exists(path)
if not isExist:
    os.mkdir(path)

import pandas as pd
header_list = ["filename"]
df=pd.read_csv("results/Interim/mzML/filelist.txt", names=header_list, index_col= None)
df['ATTRIBUTE_MAPID'] = np.arange(len(df))
df["ATTRIBUTE_MAPID"]= "MAP" + df["ATTRIBUTE_MAPID"].astype(str)
df['ATTRIBUTE_genomeID']=df['filename'].str.extract(r'(NBC_?\d*|NBC?\d*)')
df['ATTRIBUTE_genomeID']= df['ATTRIBUTE_genomeID'].fillna("blank")
df['ATTRIBUTE_genomeIDMDNA']=df['filename'].str.extract(r'(MDNAWGS?\d*|MDNA_WGS_?\d*)')
df['ATTRIBUTE_genomeID']=df['ATTRIBUTE_genomeID'].fillna(df['ATTRIBUTE_genomeIDMDNA'])
df["ATTRIBUTE_media"]= df['filename'].str.extract(r'(ISP2|DNPM|FPY12|MA|soyM\d*)')
df["ATTRIBUTE_comment"]= df['ATTRIBUTE_genomeID'].astype(str) +"_" + df["ATTRIBUTE_media"].astype(str)
df=df.drop(columns="ATTRIBUTE_genomeIDMDNA")
df.to_csv("results/GNPSexport/metadata.tsv", sep='\t', index= None)
df


In [39]:
!(cd results/Interim/mzML && rm filelist.txt)

In [None]:
import pandas as pd
import numpy as np 
import os 

path= "results/GNPSexport/"
isExist= os.path.exists(path)
if not isExist:
    os.mkdir(path)

FeatureMatrix= "results/GNPSexport/FeatureQuantificationTable.txt"
with open(FeatureMatrix, 'r') as file:
    for i,line in enumerate(file):
        if '#MAP' in line:
            header = line.split('\t')
            break

positions = [i for i,col in enumerate(header)]

def thin():
    with open(FeatureMatrix, 'r') as file:
        for i,line in enumerate(file):
            if '#MAP' in line:
                header = line
            if '#' in line:
                continue
            if 'CONSENSUS' in line:
                continue
            if 'RUN' in line:
                continue
            row = line.split('\t')
            row = [row[i] for i in positions]
            yield row

df = pd.DataFrame(thin(), columns=header)
df= df.filter(["#MAP", "id", "filename"])
df= df.rename(columns={"#MAP":"ATTRIBUTE_MAPID"})
df["ATTRIBUTE_MAPID"]= df["ATTRIBUTE_MAPID"]+ df["id"].astype(str)
df= df.drop(columns="id")
df= df[["filename", "ATTRIBUTE_MAPID"]]
df["ATTRIBUTE_genomeID"]= "NBC"+ df['filename'].str.extract(r'(_00?\d*)')
df["ATTRIBUTE_media"]= df['filename'].str.extract(r'(ISP2|DNPM|FPY12|MA|soyM\d*)')
df.to_csv("results/GNPSexport/metadata.tsv", sep='\t', index= None)
df