In [1]:
#
# Import Libraries
#

import os
import sys
import pandas as pd
import numpy as np

utilsPath = r'S:\U_Proteomica\UNIDAD\software\MacrosRafa\data\Metabolomics\PESA_Integromics\Data\utils'
if utilsPath not in sys.path:
    sys.path.append(utilsPath)

from BasicStats import Xstats

In [2]:
#
# Set constants
#

Qworking_path = r"S:\U_Proteomica\UNIDAD\software\MacrosRafa\data\Metabolomics\PESA_Integromics\Data\Proteomics\PESA"
Mworking_path = r"S:\U_Proteomica\UNIDAD\software\MacrosRafa\data\Metabolomics\PESA_Integromics\Data\Metabolomics\PESA"

# X
xq_path = os.path.join(Qworking_path, "WorkingFiles", "Xq_minus_X_norm.tsv")
mq_path = os.path.join(Mworking_path, "WorkingFiles", "Xm_norm.tsv")

# Feature info
q2i_path = os.path.join(Qworking_path, "WorkingFiles", "q2info.tsv")
m2i_path = os.path.join(Mworking_path, "WorkingFiles", "f2info.tsv")

# metadata
mdata_path = os.path.join(r"S:\U_Proteomica\UNIDAD\software\MacrosRafa\data\Metabolomics\PESA_Integromics\Data\Metadata\PESA\WorkingFiles\main_metadata.tsv")

In [3]:
#
# Read data
#

xq = pd.read_csv(xq_path, sep='\t', index_col=0)
xm = pd.read_csv(mq_path, sep='\t', index_col=0)

q2i = pd.read_csv(q2i_path, sep='\t')
#m2i = pd.read_csv(m2i_path, sep='\t')

mdata = pd.read_csv(mdata_path, sep='\t').set_index('Seqn')

In [4]:
#
# Parameters
#

qualCols = ['Caso/control', 'Smoke_dummy']
quanCols = [
    'Plaque_thickness',
    'Calcium_Score',
    'Plaque_burden',
    'Total_Cholesterol',
    'HDL',
    'LDL',
    'Ox-LDL',
    'Lipoprotein(a)',
    'CRP',
    'Framingham 10y',
    'Framingham 30y',
    'deqage',
    'Systolic Blodd Pressure',
    'Diastolic Blood Pressure'
]

In [5]:
# Create Xstats object

xqs = Xstats(mdata, xq, qualCols, quanCols)
xms = Xstats(mdata, xm, qualCols, quanCols)

In [6]:
# Simple Logistic Regression

xqs.LogR()
xms.LogR()

Optimization terminated successfully.
         Current function value: 0.673982
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.677543
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.693100
         Iterations 3
Optimization terminated successfully.
         Current function value: 0.690203
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.691859
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.693089
         Iterations 3
Optimization terminated successfully.
         Current function value: 0.693011
         Iterations 3
Optimization terminated successfully.
         Current function value: 0.691201
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.679827
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.691478
  

In [7]:
# Multiple Logistic Regression

xqs.MLogR(topN=100)
xms.MLogR(topN=100)

Optimization terminated successfully.
         Current function value: 0.448714
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.287647
         Iterations 9
Optimization terminated successfully.
         Current function value: 0.411806
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.311504
         Iterations 9


In [8]:
# Regularised Multiple Logistic Regression

xqs.RMLogR(topN=100)
xms.RMLogR(topN=100)

Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.4947095457917875
            Iterations: 86
            Function evaluations: 87
            Gradient evaluations: 86
Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.3540436213758815
            Iterations: 100
            Function evaluations: 100
            Gradient evaluations: 100
Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.47948051556672033
            Iterations: 148
            Function evaluations: 149
            Gradient evaluations: 148
Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.38434443876489277
            Iterations: 168
            Function evaluations: 168
            Gradient evaluations: 168


In [9]:
# Linear Regression

xqs.LinR()
xms.LinR()

In [10]:
# Multiple Linear Regression

xqs.MLinR(topN=100)
xms.MLinR(topN=100)

In [11]:
# Correlations (Pearson, Spearman, Kendall)

xqs.correlations()
xms.correlations()

In [12]:
# T-test & U-test (Mann-Whitney)

xqs.TUtest()
xms.TUtest()

In [13]:
# FDR using BH

xqs.FDR()
xms.FDR()

In [14]:
# Write df

xqs.export().to_csv('Xq_stats.tsv', sep='\t')
xms.export().to_csv('Xm_stats.tsv', sep='\t')

In [15]:
xms.export()

Unnamed: 0_level_0,Caso/control,Caso/control,Caso/control,Caso/control,Caso/control,Caso/control,Caso/control,Caso/control,Caso/control,Caso/control,...,Diastolic Blood Pressure,Diastolic Blood Pressure,Diastolic Blood Pressure,Diastolic Blood Pressure,Diastolic Blood Pressure,Diastolic Blood Pressure,Diastolic Blood Pressure,Diastolic Blood Pressure,Diastolic Blood Pressure,Diastolic Blood Pressure
Unnamed: 0_level_1,LogR,LogR,LogR,MLogR,MLogR,MLogR,RMLogR,RMLogR,RMLogR,ttest,...,MLinR,Pearson,Pearson,Pearson,Spearman,Spearman,Spearman,Kendall,Kendall,Kendall
Unnamed: 0_level_2,params,pvalues,fdr,params,pvalues,fdr,params,pvalues,fdr,t,...,fdr,correlations,pvalues,fdr,correlations,pvalues,fdr,correlations,pvalues,fdr
C18P8,-0.103694,0.502897,0.996045,,,,,,,0.668955,...,,0.002348,0.963474,0.989548,0.033215,0.516929,0.764921,0.021205,0.541823,0.777805
C18P9,0.095200,0.592373,0.996045,,,,,,,-0.534440,...,,0.047986,0.348973,0.731183,0.070079,0.171102,0.492684,0.045495,0.190579,0.504317
C18P11,0.014894,0.939219,0.996045,,,,,,,-0.076054,...,,0.002682,0.958281,0.989548,0.044692,0.383091,0.675869,0.027708,0.425361,0.702269
C18P13,0.123364,0.535272,0.996045,,,,,,,-0.618990,...,,0.060949,0.234043,0.632318,0.083114,0.104360,0.394309,0.053999,0.120296,0.424056
C18P15,0.121951,0.290065,0.996045,,,,,,,-1.059070,...,,0.086454,0.091111,0.435546,0.070177,0.170504,0.492684,0.047163,0.174832,0.492572
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HILN597,0.060283,0.618055,0.996045,,,,,,,-0.497698,...,,0.071289,0.163812,0.547591,0.054245,0.289641,0.613010,0.036129,0.298610,0.614034
HILN598,0.142153,0.197523,0.996045,,,,,,,-1.292337,...,,0.005405,0.916035,0.975323,-0.023001,0.653634,0.841463,-0.015813,0.649148,0.835338
HILN599,-0.081510,0.467835,0.996045,,,,,,,0.725609,...,,-0.041357,0.419622,0.756101,-0.038346,0.454302,0.722610,-0.026569,0.444645,0.714092
HILN600,-0.041591,0.712343,0.996045,,,,,,,0.367952,...,,-0.014302,0.780250,0.911584,0.000882,0.986268,0.995718,0.001473,0.966199,0.990250
