In [9]:
import numpy as np
import plotly.graph_objs as go
import pandas as pd
from rdkit import Chem
from matchms.importing import load_from_msp
from matchms.logging_functions import set_matchms_logger_level

from utils import is_spectrum_for_compound

set_matchms_logger_level('ERROR')

In [10]:
matchms_scores = pd.read_csv("../data/output_matching/matchms/matchms_tol_0.01_1%I_all_peaks_with_0s.tsv", sep="\t")

matchms_scores = matchms_scores[matchms_scores.apply(lambda x: is_spectrum_for_compound(x['query'], x['reference']), axis=1)]

molecules = Chem.SDMolSupplier("../../data/RECETOX_GC-EI-MS_20201028.sdf")

predicted_spectra = list(load_from_msp("../data/filtered/simulated_matchms_filter_1%I_all_peaks.msp"))
spectra_metadata = pd.DataFrame.from_dict([x.metadata for x in predicted_spectra])
spectra_metadata.rename(columns={'compound_name': 'query'}, inplace=True)

reference_spectra = list(load_from_msp("../data/experimental/RECETOX_GC-EI_MS_20201028.msp"))
reference_spectra_metadata = pd.DataFrame.from_dict([x.metadata for x in reference_spectra])
reference_spectra_metadata.rename(columns={'compound_name': 'reference'}, inplace=True)


In [11]:
merged = matchms_scores.merge(spectra_metadata, on="query", how="inner")
merged.rename(columns={'num_peaks': 'n_peaks_query'}, inplace=True)

merged = merged.merge(reference_spectra_metadata, on="reference", how="inner")
merged.rename(columns={'num_peaks': 'n_peaks_reference'}, inplace=True)

In [12]:
merged['CosineHungarian_0.01_0.0_1.0_matches'] = pd.to_numeric(merged['CosineHungarian_0.01_0.0_1.0_matches'], errors='coerce')
merged['n_peaks_query'] = pd.to_numeric(merged['n_peaks_query'], errors='coerce')
merged['n_peaks_reference'] = pd.to_numeric(merged['n_peaks_reference'], errors='coerce')

merged['FractionQuery'] = merged['CosineHungarian_0.01_0.0_1.0_matches'] / merged['n_peaks_query']
merged['FractionReference'] = merged['CosineHungarian_0.01_0.0_1.0_matches'] / merged['n_peaks_reference']

In [13]:
# Create a scatter plot
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=merged['CosineHungarian_0.01_0.0_1.0_scores'],
    y=merged['CosineHungarian_0.01_0.0_1.0_matches'],
    mode='markers',
    marker=dict(
        size=merged['FractionQuery'] * 20,  # Adjust the size scaling factor as needed
        color=merged['FractionReference'],
        colorscale='Viridis',  # change the colorscale as needed
        colorbar=dict(title='Reference Matched %'),
        opacity=0.5
    ),
    name='Matched Ions'
))
fig.update_layout(
    title='Scatter plot of matched ions and scores',
    xaxis_title='Score',
    yaxis_title='Matches'
)
fig.show()
fig.write_image("scatter_plot.png")