<a href="https://colab.research.google.com/github/Ash100/DiSHaN/blob/main/Dr_Mudassar.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Note**. This notebook is generated for Dr. Mudassar to analyse the LC-MS Data - Please use it with author consent.<br>
Email: ashfaqahmad82@hotmail.com

In [1]:
!pip install pyteomics numpy pandas matplotlib


In [2]:
from pyteomics import mzxml
import pandas as pd
import numpy as np

filename = '/content/RAR1.mzXML'  # replace with your uploaded filename

peaks_data = []

with mzxml.read(filename) as reader:
    for spectrum in reader:
        if spectrum['msLevel'] == 1:  # MS1 spectra
            mzs = spectrum['m/z array']
            intensities = spectrum['intensity array']
            for mz, intensity in zip(mzs, intensities):
                peaks_data.append([mz, intensity])

# Convert to DataFrame
df = pd.DataFrame(peaks_data, columns=['m/z', 'Intensity'])

# Bin peaks to merge very close m/z values
mz_bin_width = 0.01
df['mz_bin'] = (df['m/z'] / mz_bin_width).round() * mz_bin_width
agg_df = df.groupby('mz_bin')['Intensity'].sum().reset_index()

# Relative quantification
agg_df['Relative_Abundance'] = agg_df['Intensity'] / agg_df['Intensity'].sum()

# Sort by abundance
agg_df = agg_df.sort_values(by='Relative_Abundance', ascending=False)
agg_df.head(10)


Unnamed: 0,mz_bin,Intensity,Relative_Abundance
12,101.2,3960594000.0,0.010786
13,101.3,3826980000.0,0.010422
11,101.1,3803279000.0,0.010358
421,142.1,3746560000.0,0.010203
420,142.0,3745870000.0,0.010201
14,101.4,3704752000.0,0.010089
422,142.2,3667310000.0,0.009987
419,141.9,3606631000.0,0.009822
10,101.0,3407252000.0,0.009279
423,142.3,3374505000.0,0.00919


In [3]:
# Assuming your DataFrame is 'agg_df' with 'mz_bin' and 'Intensity'

# Relative percent
agg_df['Percent_Composition'] = (agg_df['Intensity'] / agg_df['Intensity'].sum()) * 100

# Sort by percent for clarity
agg_df = agg_df.sort_values(by='Percent_Composition', ascending=False)

agg_df.head(10)


Unnamed: 0,mz_bin,Intensity,Relative_Abundance,Percent_Composition
12,101.2,3960594000.0,0.010786,1.078606
13,101.3,3826980000.0,0.010422,1.042218
11,101.1,3803279000.0,0.010358,1.035764
421,142.1,3746560000.0,0.010203,1.020317
420,142.0,3745870000.0,0.010201,1.020129
14,101.4,3704752000.0,0.010089,1.008931
422,142.2,3667310000.0,0.009987,0.998735
419,141.9,3606631000.0,0.009822,0.98221
10,101.0,3407252000.0,0.009279,0.927912
423,142.3,3374505000.0,0.00919,0.918994


In [7]:
from pyteomics import mzxml
import pandas as pd

filename = "/content/RAR1.mzXML"  # Replace with uploaded filename
peaks_data = []

with mzxml.read(filename) as reader:
    for spectrum in reader:
        if spectrum['msLevel'] == 1:
            rt = spectrum['retentionTime']
            # Convert retention time to seconds
            if isinstance(rt, str) and rt.startswith("PT") and rt.endswith("S"):
                rt_sec = float(rt[2:-1])
            else:
                rt_sec = float(rt)

            mzs = spectrum['m/z array']
            intensities = spectrum['intensity array']

            for mz, intensity in zip(mzs, intensities):
                peaks_data.append([rt_sec, mz, intensity])

# Convert to DataFrame
df = pd.DataFrame(peaks_data, columns=['Retention_Time_s', 'm/z', 'Intensity'])

# Bin by m/z
mz_bin_width = 0.01
df['mz_bin'] = (df['m/z'] / mz_bin_width).round() * mz_bin_width

# Calculate mean RT and intensity
agg_df = df.groupby('mz_bin').agg(
    Intensity=('Intensity', 'sum'),
    Mean_RT=('Retention_Time_s', 'mean')
).reset_index()

# Calculate Exact_RT (retention time at max intensity per m/z bin)
exact_rt = df.loc[df.groupby('mz_bin')['Intensity'].idxmax(), ['mz_bin','Retention_Time_s']]
exact_rt = exact_rt.rename(columns={'Retention_Time_s':'Exact_RT'})

# Merge into agg_df
agg_df = agg_df.merge(exact_rt, on='mz_bin', how='left')

# Calculate relative abundance and percent composition
agg_df['Relative_Abundance'] = agg_df['Intensity'] / agg_df['Intensity'].sum()
agg_df['Percent_Composition'] = agg_df['Relative_Abundance'] * 100

agg_df.head()


Unnamed: 0,mz_bin,Intensity,Mean_RT,Exact_RT,Relative_Abundance,Percent_Composition
0,100.0,2798422000.0,14.995774,23.524,0.007621,0.762106
1,100.1,2666623000.0,14.995774,18.581333,0.007262,0.726213
2,100.2,2604504000.0,14.995774,25.048833,0.007093,0.709296
3,100.3,2499617000.0,14.995774,25.048833,0.006807,0.680732
4,100.4,2438758000.0,14.995774,25.048833,0.006642,0.664158


In [None]:
# Save the DataFrame to CSV
agg_df.to_csv('RAR1.csv', index=False)
print("CSV file saved as 'LCMS_percent_composition.csv'")
