In [1]:
#
# Import Libraries
#

import os
import sys
import pandas as pd
import numpy as np

utilsPath = r'S:\U_Proteomica\UNIDAD\software\MacrosRafa\data\Metabolomics\PESA_Integromics\Data\utils'
if utilsPath not in sys.path:
    sys.path.append(utilsPath)

from BasicStats import Xstats

In [4]:
#
# Set constants
#

Qworking_path = r"S:\U_Proteomica\UNIDAD\software\MacrosRafa\data\Metabolomics\PESA_Integromics\Data\Proteomics\PESA_V2"
Mworking_path = r"S:\U_Proteomica\UNIDAD\software\MacrosRafa\data\Metabolomics\PESA_Integromics\Data\Metabolomics\PESA_V2"

# X
xq_path = os.path.join(Qworking_path, "WorkingFiles", "Xq_minus_X_norm.tsv")
mq_path = os.path.join(Mworking_path, "WorkingFiles", "Xm_norm.tsv")

# Feature info
q2i_path = os.path.join(Qworking_path, "WorkingFiles", "q2info.tsv")
m2i_path = os.path.join(Mworking_path, "WorkingFiles", "f2i.tsv")

# metadata
mdata_path = os.path.join(r"S:\U_Proteomica\UNIDAD\software\MacrosRafa\data\Metabolomics\PESA_Integromics\Data\Metadata\PESA_V2\WorkingFiles\main_metadata.tsv")

In [5]:
#
# Read data
#

xq = pd.read_csv(xq_path, sep='\t', index_col=0)
xm = pd.read_csv(mq_path, sep='\t', index_col=0)

q2i = pd.read_csv(q2i_path, sep='\t')
m2i = pd.read_csv(m2i_path, sep='\t')

mdata = pd.read_csv(mdata_path, sep='\t').set_index('Seqn')

In [8]:
#
# Parameters
#

qualCols = ['Caso/control', 'Smoke_dummy']
quanCols = [
    'Plaque_thickness',
    'Calcium_Score',
    'Plaque_burden',
    'Total_Cholesterol',
    'HDL',
    'LDL',
    'Ox-LDL',
    'Lipoprotein a',
    'CRP',
    'Framingham 10y',
    'Framingham 30y',
    'deqage',
    'Systolic Blood Pressure',
    'Diastolic Blood Pressure'
]

In [9]:
# Create Xstats object

xqs = Xstats(mdata, xq, qualCols, quanCols)
xms = Xstats(mdata, xm, qualCols, quanCols)

In [10]:
# Simple Logistic Regression

xqs.LogR()
xms.LogR()

Optimization terminated successfully.
         Current function value: 0.689213
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.693143
         Iterations 3
Optimization terminated successfully.
         Current function value: 0.691605
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.688132
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.691435
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.692880
         Iterations 3
Optimization terminated successfully.
         Current function value: 0.691499
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.692625
         Iterations 3
Optimization terminated successfully.
         Current function value: 0.692977
         Iterations 3
Optimization terminated successfully.
         Current function value: 0.687645
  

In [11]:
# Multiple Logistic Regression

xqs.MLogR(topN=100)
xms.MLogR(topN=100)

Optimization terminated successfully.
         Current function value: 0.521390
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.355931
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.472424
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.130569
         Iterations 12


In [12]:
# Regularised Multiple Logistic Regression

xqs.RMLogR(topN=100)
xms.RMLogR(topN=100)

Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.5532857088939604
            Iterations: 117
            Function evaluations: 118
            Gradient evaluations: 117
Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.41472408808822875
            Iterations: 123
            Function evaluations: 123
            Gradient evaluations: 123
Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.5174580624487553
            Iterations: 130
            Function evaluations: 132
            Gradient evaluations: 130
Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.23604590435429001
            Iterations: 172
            Function evaluations: 174
            Gradient evaluations: 172


In [13]:
# Linear Regression

xqs.LinR()
xms.LinR()

In [14]:
# Multiple Linear Regression

xqs.MLinR(topN=100)
xms.MLinR(topN=100)

In [15]:
# Correlations (Pearson, Spearman, Kendall)

xqs.correlations()
xms.correlations()

In [16]:
# T-test & U-test (Mann-Whitney)

xqs.TUtest()
xms.TUtest()

In [17]:
# FDR using BH

xqs.FDR()
xms.FDR()

In [18]:
# Write df

xqs.export().to_csv('Xq_stats.tsv', sep='\t')
xms.export().to_csv('Xm_stats.tsv', sep='\t')

In [19]:
xms.export()

Unnamed: 0_level_0,Caso/control,Caso/control,Caso/control,Caso/control,Caso/control,Caso/control,Caso/control,Caso/control,Caso/control,Caso/control,...,Diastolic Blood Pressure,Diastolic Blood Pressure,Diastolic Blood Pressure,Diastolic Blood Pressure,Diastolic Blood Pressure,Diastolic Blood Pressure,Diastolic Blood Pressure,Diastolic Blood Pressure,Diastolic Blood Pressure,Diastolic Blood Pressure
Unnamed: 0_level_1,LogR,LogR,LogR,MLogR,MLogR,MLogR,RMLogR,RMLogR,RMLogR,ttest,...,MLinR,Pearson,Pearson,Pearson,Spearman,Spearman,Spearman,Kendall,Kendall,Kendall
Unnamed: 0_level_2,params,pvalues,fdr,params,pvalues,fdr,params,pvalues,fdr,t,...,fdr,correlations,pvalues,fdr,correlations,pvalues,fdr,correlations,pvalues,fdr
C18P1,-0.140710,0.189945,0.644101,,,,,,,1.314075,...,,-0.065568,0.169773,0.588113,-0.053636,0.261573,0.656262,-0.036426,0.261027,0.662855
C18P2,-0.042480,0.665259,0.906652,,,,,,,0.431948,...,,-0.059242,0.214894,0.628955,-0.089752,0.059958,0.431712,-0.059096,0.068230,0.442425
C18P5,-0.117997,0.237084,0.697395,,,,,,,1.183670,...,,-0.052014,0.276296,0.678241,-0.046620,0.329231,0.725876,-0.030616,0.344810,0.728437
C18P7,-0.035952,0.710987,0.917431,,,,,,,0.369869,...,,0.003788,0.936856,0.977678,-0.029218,0.541025,0.840056,-0.020281,0.531448,0.841664
C18P12,-0.037558,0.707817,0.917431,,,,,,,0.374070,...,,-0.018866,0.693109,0.906029,-0.051892,0.277422,0.673457,-0.035058,0.279364,0.676114
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HILP2033,0.114162,0.239311,0.700936,,,,,,,-1.179317,...,,0.041843,0.381257,0.740954,0.052484,0.271971,0.668714,0.037394,0.248564,0.651000
HILP2034,0.073090,0.460344,0.820887,,,,,,,-0.738245,...,,-0.052094,0.275555,0.678241,-0.034916,0.465052,0.814029,-0.022744,0.482811,0.817103
HILP2036,-0.002587,0.980029,0.991571,,,,,,,0.024975,...,,0.019405,0.684795,0.904000,0.027418,0.566247,0.852886,0.017671,0.585576,0.858789
HILP2038,-0.076318,0.462226,0.820887,,,,,,,0.734419,...,,-0.070700,0.138698,0.546446,-0.056005,0.241056,0.644929,-0.035752,0.269948,0.667918
