In [1]:
from scipy.optimize import curve_fit
from matplotlib.figure import Figure
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import math
import glob
from pymsfilereader import MSFileReader
from PyQt5.QtCore import *
from PyQt5.QtGui import *
from PyQt5.QtWidgets import *
import sys
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
import re
import xlwings as xw
from pathlib import Path

In [2]:
class FileBrowser(QWidget):
    ImportFile = 0
    ImportFiles = 1
    SaveImage = 2
    
    def __init__(self, title, mode=ImportFile):
        QWidget.__init__(self)
        layout = QHBoxLayout()
        self.setLayout(layout)
        self.browser_mode = mode
        self.filter_name = 'All files (*.*)'
        self.dirpath = QDir.currentPath()
        self.label = QLabel("Import")
        self.label.setText(title)
        self.label.setFixedWidth(80)
        self.label.setFont(QFont("Arial",weight=QFont.Bold))
        self.label.setAlignment(Qt.AlignLeft | Qt.AlignVCenter)
        layout.addWidget(self.label)
        self.lineEdit = QLineEdit(self)
        self.lineEdit.setFixedWidth(720)
        layout.addWidget(self.lineEdit)
        self.button = QPushButton('Browse')
        self.button.clicked.connect(self.getFile)
        layout.addWidget(self.button)
        layout.addStretch()
        
    def setMode(mode):
        self.mode = mode
        
    def setFileFilter(text):
        self.filter_name = text 
    
    def getFile(self):
        self.filepaths = []
        if self.browser_mode == FileBrowser.ImportFile:
            self.filepaths.append(QFileDialog.getOpenFileName(self, caption='Choose File',
                                                    directory=self.dirpath)[0])
        elif self.browser_mode == FileBrowser.ImportFiles:
            self.filepaths.extend(QFileDialog.getOpenFileNames(self, caption='Choose Files',
                                                    directory=self.dirpath)[0])
        else:
            options = QFileDialog.Options()
            if sys.platform == 'darwin':
                options |= QFileDialog.DontUseNativeDialog
            self.filepaths.append(QFileDialog.getExistingDirectory(self, caption='Save/Save As',
                                                    directory=self.dirpath))
        if len(self.filepaths) == 0:
            return
        elif len(self.filepaths) == 1:
            self.lineEdit.setText(self.filepaths[0])
        else:
            self.lineEdit.setText(",".join(self.filepaths))
    
    def setLabelWidth(self, width):
        self.label.setFixedWidth(width)   
        
    def setlineEditWidth(self, width):
        self.lineEdit.setFixedWidth(width)
        
    def getPaths(self):
        return self.filepaths
    
    
class Window(QDialog):
    # constructor
    def __init__(self, parent=None):
        QDialog.__init__(self, parent)
        # Ensure our window stays in front and give it a title
        self.setWindowFlags(Qt.WindowStaysOnTopHint)
        self.setWindowTitle("raMSI Regression Equation Generator")
        #self.setFixedSize(400, 300)
        self.setGeometry(500, 200, 900, 800)
        self.figure = plt.figure(figsize=(800, 800), dpi=100)
        self.canvas = FigureCanvas(self.figure)
        # Create and assign the main (vertical) layout.
        vlayout = QVBoxLayout()
        self.setLayout(vlayout)
        self.fileBrowserPanel(vlayout)
        vlayout.addStretch()
        self.imagePanel(vlayout)
        vlayout.addStretch()
        self.addButtonPanel(vlayout)
        self.show()
        
    def fileBrowserPanel(self, parentLayout):
        self.fileFB = FileBrowser('Import 1 File', FileBrowser.ImportFile)
        self.filesFB = FileBrowser('Import Files', FileBrowser.ImportFiles)
        flayout = QVBoxLayout()
        self.stackedWidget = QStackedWidget(self)
        self.stackedWidget.addWidget(self.fileFB)
        self.stackedWidget.addWidget(self.filesFB)
        self.xLabel = QLabel("Select an import option")
        self.xComboBox = QComboBox(self)
        self.xComboBox.addItems(["Import a .raw file", "Import a batch of .raw files"])
        self.xComboBox.activated[int].connect(self.stackedWidget.setCurrentIndex)
        self.xLabel.setBuddy(self.xComboBox)
        flayout.addWidget(self.xLabel)
        flayout.addWidget(self.xComboBox)
        flayout.addWidget(self.stackedWidget)
        flayout.addStretch()
        self.saveFB = FileBrowser('Export to', FileBrowser.SaveImage)
        flayout.addWidget(self.saveFB)
        flayout.addStretch()
        parentLayout.addLayout(flayout)
           
    def imagePanel(self, parentLayout):
        imlayout = QVBoxLayout()
        imlayout.addWidget(self.canvas)
        parentLayout.addLayout(imlayout)
          
    def addButtonPanel(self, parentLayout):
        hlayout = QHBoxLayout()
        hlayout.addStretch()
        self.closeButton = QPushButton("Close")
        self.closeButton.clicked.connect(self.closeButtonAction)
        hlayout.addWidget(self.closeButton)
        parentLayout.addLayout(hlayout)
        self.button = QPushButton("Run")
        self.button.clicked.connect(self.buttonAction)
        hlayout.addWidget(self.button)
        parentLayout.addLayout(hlayout)
        self.button = QPushButton(" Copy the regression equation into Clipboard ")
        self.button.clicked.connect(self.copyButtonAction)
        hlayout.addWidget(self.button)
        parentLayout.addLayout(hlayout)

    def process_one_file(self, file):
        frame = []
        try:
            rawfile=MSFileReader(file)
            for i in range(rawfile.FirstSpectrumNumber, rawfile.LastSpectrumNumber + 1):
                labels, flags = rawfile.GetLabelData(i)
                cur_df = pd.DataFrame({'x': i , 'm/z': labels.mass, 'intensity': labels.intensity, 'resolution': labels.resolution, 'noise': labels.noise})
                frame.append(cur_df)
            return frame
        except IOError:
            print("file error {file}".format(file=file))
            
    def file_to_csv(self, file):
        to_csv=file[0]
        for i in range(1,len(file)):
            cat = file[i]
            to_csv = pd.concat([to_csv, cat])
        to_csv['S/N'] = to_csv['intensity'] / to_csv['noise']
        to_csv = to_csv.sort_values(by = ['S/N'], ascending = False).reset_index(drop = True).iloc[:1001]
        return to_csv
        
    def calculation(self):
        if self.xComboBox.currentIndex() == 0:
            savePath = str(self.saveFB.getPaths()[0]) + "/demoFile_to_csv.csv"
            df = pd.read_csv(savePath)
            x = df["m/z"]
            y = df["resolution"]
        elif self.xComboBox.currentIndex() == 1:
            new_file_path = str(self.saveFB.getPaths()[0]) + "/demoFile_to_csv.csv"
            df = pd.read_csv(new_file_path)
            x = df["m/z"]
            y = df["resolution"]
                    
        x_log = []
        x_log_sq = []
        y_log = []
        for i in range(len(y)):
            x_log.append(math.log(x[i]))
            x_log_sq.append( (math.log(x[i]))**2 )
            y_log.append(math.log(y[i]))
        lnXlnY = []
        for i in range(len(y)):
            lnXlnY.append( x_log[i] * y_log[i] )
        sum_lnXlnY = np.sum(lnXlnY)
        sum_lnX = np.sum(x_log)
        sum_lnY = np.sum(y_log)
        sum_lnX2 = np.sum(x_log_sq)
        B = (len(y) * sum_lnXlnY - sum_lnX * sum_lnY) / (len(y) * sum_lnX2 - sum_lnX ** 2)
        A = math.exp( (sum_lnY - B * sum_lnX) / len(y) )
        # ln(y) = ln(A) + B*ln(x)
        lnY_estimate = []
        for i in range(len(y)):
            lnY_estimate.append( math.log(A) + B * x_log[i] )
        mean = np.mean(y_log)
        residuals = []
        tot = []
        for i in range(len(y)):
            residuals.append( (y_log[i] - lnY_estimate[i])**2 )
            tot.append( (y_log[i] - mean)**2 )
        ss_res = np.sum(residuals)
        ss_tot = np.sum(tot)
        r_squared = 1 - (ss_res / ss_tot)
        r_squared, round(r_squared, 4)

        return A, B, r_squared
        
    def buttonAction(self, parentLayout):
        #print(self.xComboBox.currentIndex())
        if self.xComboBox.currentIndex() == 0:
            savePath = str(self.saveFB.getPaths()[0]) + "/demoFile_to_csv.csv"
            demoFile = self.fileFB.getPaths()[0]
            self.file_to_csv(self.process_one_file(file=demoFile)).to_csv(savePath)
            df = pd.read_csv(savePath)
            x = df["m/z"]
            y = df["resolution"]
        
        elif self.xComboBox.currentIndex() == 1:
            #print((self.filesFB.getPaths()))
            for i in range(len(self.filesFB.getPaths())):
                savePath = str(self.saveFB.getPaths()[0]) + f"/demoFile_to_csv_{i}.csv"
                demoFile = self.filesFB.getPaths()[i]
                self.file_to_csv(self.process_one_file(file=demoFile)).to_csv(savePath)
            folder_path = Path(self.saveFB.getPaths()[0])
            new_file_path = str(self.saveFB.getPaths()[0]) + "/demoFile_to_xlsx.xlsx"
            file_list = folder_path.glob("demoFile_to_csv_*.csv")
            with pd.ExcelWriter(new_file_path) as workbook:
                for csv in file_list:
                    stem_name = csv.stem
                    datas = pd.read_csv(csv)
                    datas.to_excel(workbook, sheet_name=stem_name, index=False)
            datass = pd.read_excel(new_file_path, sheet_name=None)
            all_data = pd.concat(datass, ignore_index=True)
            new_file_path = str(self.saveFB.getPaths()[0]) + "/demoFile_to_csv.csv"
            all_data.to_csv(new_file_path, index=False)
            
            df = pd.read_csv(new_file_path)
            x = df["m/z"]
            y = df["resolution"]
          
        popt, pcov = curve_fit(lambda fx,k,t: k*fx**-t, x, y)
        x_linspace = np.linspace(min(x), max(x), 500)
        power_y = popt[0]*x_linspace**-popt[1]

        A, B, r_squared = self.calculation()

        self.figure.clear()
        #create a subplot
        ax = self.figure.add_subplot(111)

        scatter = ax.scatter(x, y, s=df["S/N"]/100, label='detected ions (size: S/N ratio)', c="blue", alpha=0.75, lw=0)
        ax.set_xlabel("m/z", fontproperties='Arial', fontsize=12)
        ax.set_ylabel("Resolution", fontproperties='Arial', fontsize=12)
        ax.set_title("The m/z-to-Resolution Plot", fontproperties='Arial', fontsize=14)
        ax.axis(xmin=0, xmax=1600)
        ax.axis(ymin=0, ymax=200000)
        ax.plot(x_linspace, power_y, label='regression curve', color = "red")
        print("result from scipy is not precise: A != " + str(popt[0]) + ", B != -" + str(popt[1]) 
              + "so I calculate the coefficient A value & index B value by myself," 
              + " giving out the same values as the values calculated by Excel, accompanying with the R-square value.")

        ax.annotate('y = ' + str(round(A)) + ' x * exp(' + str(round(B, 4)) + ')', (1000, 120000), horizontalalignment='center',
             verticalalignment='center', fontproperties='Arial', fontsize=12)
        ax.annotate('R-square = ' + str(round(r_squared, 4)), (1000, 110000), horizontalalignment='center',
             verticalalignment='center', fontproperties='Arial', fontsize=12)

        handles, labels = scatter.legend_elements(prop="sizes", alpha=0.6, color="lightgrey", num=4)
        legend = ax.legend(handles, labels, loc="upper right", title="S/N ratio (*100)", title_fontsize=10, fontsize=10)
        #draw new graph
        self.canvas.draw()

        savePath = str(self.saveFB.getPaths()[0]) + "/regression_demo.jpg"
        self.figure.savefig(savePath, bbox_inches='tight', dpi = 300)
        
    def closeButtonAction(self):
        sys.exit(app.exec_())
        
    def copyButtonAction(self):
        A, B, r_squared = self.calculation()
        A = round(A)
        B = round(B, 4)
        r_squared = round(r_squared, 4)
        cb = QApplication.clipboard()
        cb.clear(mode=cb.Clipboard)
        cb.setText(str(A) + ", " + str(B) + ", " + str(r_squared), mode=cb.Clipboard)

In [None]:
if __name__ == '__main__':
    # Create the Qt Application
    app = QApplication(sys.argv)
    demo = Window() # <<-- Create an instance
    demo.show()
    sys.exit(app.exec_())

result from scipy is not precise: A != 948047.115406828, B != -0.5103291907221708so I calculate the coefficient A value & index B value by myself, giving out the same values as the values calculated by Excel, accompanying with the R-square value.
result from scipy is not precise: A != 928544.7912988946, B != -0.5063203713462949so I calculate the coefficient A value & index B value by myself, giving out the same values as the values calculated by Excel, accompanying with the R-square value.
