In [1]:
from datetime import datetime, date, time
import pandas as pd
import numpy as np

from matchms.importing import load_from_msp
import matchms.filtering as ms_filters
from matchms import calculate_scores
from matchms.similarity import CosineGreedy
from matchms import Spectrum

import networkx as nx
import matchmsextras.networking as net
from matchms.networking import SimilarityNetwork

In [2]:
tox21_compoundData=pd.read_csv('http://bioinf.jku.at/research/DeepTox/tox21_compoundData.csv')
columns_to_keep = list(tox21_compoundData.columns[1:2]) + list(tox21_compoundData.columns[-12:])
tox21_compoundData_short = tox21_compoundData[columns_to_keep]
df_combined = tox21_compoundData_short.groupby('inchikey').max().reset_index()
df_combined.replace('NaN', np.nan, inplace=True)
df_combined.dropna(how='all', inplace=True)
df_combined.set_index('inchikey', inplace=True)
inchikeys_tox21=df_combined.index.tolist()
tox_dict = df_combined.to_dict(orient='index')

In [3]:
import csv
import numpy as np
from matchms import Spectrum
spectrums=[]
with open('DDA_Pos_EF.csv', 'r', encoding='utf-8-sig') as file:
    reader = csv.reader(file, delimiter=';')
    for row in reader:
        if row[0] == 'id' or not row[0]:
            continue
        mz_intensities = row[1].split(' ')
        mz_values, intensity_values = zip(*[mz_intensity.split(':') for mz_intensity in mz_intensities])
        spectrum = Spectrum(mz=np.array(list(map(float, mz_values))),
                            intensities=np.array(list(map(float, intensity_values))),
                            metadata={'id': row[0],
                                      'num_peaks': len(mz_values)})
        spectrums.append(spectrum)

















In [4]:
import matchms.filtering as ms_filters
def peak_processing(spectrum):
    spectrum = ms_filters.default_filters(spectrum)
    spectrum = ms_filters.normalize_intensities(spectrum)
    spectrum = ms_filters.select_by_intensity(spectrum, intensity_from=0.05)
    return spectrum
spectrums = [peak_processing(s) for s in spectrums]

transformed_spectrums = []
for spectrum in spectrums:
    transformed_intensities = np.sqrt(spectrum.intensities)  
    transformed_spectrum = Spectrum(mz=spectrum.mz, intensities=transformed_intensities, metadata=spectrum.metadata)
    transformed_spectrums.append(transformed_spectrum)
spectrums=transformed_spectrums
spectrums_features=[s for s in spectrums if len(s.peaks)>=3]

















































In [5]:
network = nx.read_graphml('network.graphml')
nodes=list(network.nodes())
spectrums=[]
spectrums = list(load_from_msp("mass_spectra.msp"))
spectrums_Net=[s for s in spectrums if s.get('inchikey') in nodes]































In [None]:
for f in spectrums_features:
    
    print(datetime.now().strftime('%d-%m-%Y %H:%M'))
    print('Feature ID: ', f.get('id'))
    id_feature=f.get('id')
    
    spectrums=[]
    spectrums=spectrums_Net+[f]
    
    similarity_measure = CosineGreedy(tolerance=0.1)
    scores = calculate_scores(spectrums, spectrums, similarity_measure, is_symmetric=True)
    scores_array = scores.scores.to_array()
    
    filtered_indexes = np.where(np.logical_and(scores_array[-1]['CosineGreedy_matches'] >= 3, scores_array[-1]['CosineGreedy_score'] >= 0.6))

    connected_active=0
    connected_inactive=0    

    for index in filtered_indexes[0][:-1]:
        inchikey = spectrums_Net[index].get('inchikey')
        activity = tox_dict[inchikey]['NR.AR']
        if activity == 1:
            connected_active += 1 
        if activity == 0:
            connected_inactive += 1
        network.add_node(id_feature, num_peaks=len(f.peaks))
        score, matches = scores_array[-1][index]
        network.add_edge(id_feature, inchikey, score=score, matches=matches)
    print('connected_active',connected_active)
    print('connected_inactive',connected_inactive)
    
    if connected_active >= connected_inactive and connected_active!= 0:        
        print('Alert: active connected nodes greater than inactive nodes')
    print()

In [None]:
len(spectrums_Net)