In [1]:
#
# Import Libraries
#

import os
import sys
import pandas as pd
import numpy as np

utilsPath = r'S:\U_Proteomica\UNIDAD\software\MacrosRafa\data\Metabolomics\PESA_Integromics\Data\utils'
if utilsPath not in sys.path:
    sys.path.append(utilsPath)

from BasicStats import Xstats

In [2]:
#
# Set constants
#

Qworking_path = r"S:\U_Proteomica\UNIDAD\software\MacrosRafa\data\Metabolomics\PESA_Integromics\Data\Proteomics\AWHS"
Mworking_path = r"S:\U_Proteomica\UNIDAD\software\MacrosRafa\data\Metabolomics\PESA_Integromics\Data\Metabolomics\AWHS"

# X
xq_path = os.path.join(Qworking_path, "WorkingFiles", "Xq_minus_X_norm.tsv")
mq_path = os.path.join(Mworking_path, "WorkingFiles", "Xm_norm_MS2.tsv")

# Feature info
q2i_path = os.path.join(Qworking_path, "WorkingFiles", "q2info.tsv")
m2i_path = os.path.join(Mworking_path, "WorkingFiles", "m2info.tsv")

# metadata
mdata_path = os.path.join(r"S:\U_Proteomica\UNIDAD\software\MacrosRafa\data\Metabolomics\PESA_Integromics\Data\Metadata\AWHS\WorkingFiles\main_metadata.tsv")


In [3]:
#
# Read data
#

xq = pd.read_csv(xq_path, sep='\t', index_col=0)
xm = pd.read_csv(mq_path, sep='\t', index_col=0)

q2i = pd.read_csv(q2i_path, sep='\t')
m2i = pd.read_csv(m2i_path, sep='\t')

mdata = pd.read_csv(mdata_path, sep='\t', index_col=0)

In [4]:
#
# Parameters
#

qualCols = ['Caso/control', 'diabetes', 'smoker', 'HIPERTENSION_RF', 'DISLIPEMIA_RF']
quanCols = ['Plaque thickness', 'Glucosa', 'Calcio Score', 'age', 'coltot', 'hdl', 'presis', 'presdi']

In [5]:
# Create Xstats object

xqs = Xstats(mdata, xq, qualCols, quanCols)
xms = Xstats(mdata, xm, qualCols, quanCols)

In [6]:
# Simple Logistic Regression

xqs.LogR()
xms.LogR()

Optimization terminated successfully.
         Current function value: 0.641101
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.666954
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.692959
         Iterations 3
Optimization terminated successfully.
         Current function value: 0.692184
         Iterations 3
Optimization terminated successfully.
         Current function value: 0.687290
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.692761
         Iterations 3
Optimization terminated successfully.
         Current function value: 0.681241
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.684184
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.686394
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.691346
  

In [7]:
# Multiple Logistic Regression

xqs.MLogR(topN=20)
xms.MLogR(topN=20)

Optimization terminated successfully.
         Current function value: 0.451976
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.044172
         Iterations 14
Optimization terminated successfully.
         Current function value: 0.531126
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.483344
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.504790
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.615805
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.080102
         Iterations 11
Optimization terminated successfully.
         Current function value: 0.562736
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.547340
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.579842


In [8]:
# Regularised Multiple Logistic Regression

xqs.RMLogR(topN=100)
xms.RMLogR(topN=100)

Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.3317921978331709
            Iterations: 145
            Function evaluations: 145
            Gradient evaluations: 145
Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.07795773793354432
            Iterations: 158
            Function evaluations: 158
            Gradient evaluations: 158
Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.43817906637613824
            Iterations: 134
            Function evaluations: 134
            Gradient evaluations: 134
Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.41005290577374287
            Iterations: 111
            Function evaluations: 112
            Gradient evaluations: 111
Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.3968498872936556
            Iterations: 124
        

In [9]:
# Linear Regression

xqs.LinR()
xms.LinR()

In [10]:
# Multiple Linear Regression

xqs.MLinR(topN=100)
xms.MLinR(topN=100)

In [14]:
# Correlations (Pearson, Spearman, Kendall)

xqs.correlations()
xms.correlations()

In [15]:
# T-test & U-test (Mann-Whitney)

xqs.TUtest()
xms.TUtest()

In [16]:
# FDR using BH

xqs.FDR()
xms.FDR()

In [17]:
# Write df

xqs.export().to_csv('Xq_stats.tsv', sep='\t')
xms.export().to_csv('Xm_stats.tsv', sep='\t')