In [1]:
import pandas as pd
import numpy as np
import scipy.stats as stats

In [2]:
#sample IDs of all control group (SHAM) samples
control = []
temp = [(1,8), (22, 35), (48, 63), (83, 95)]
for x in temp:
    for i in range(x[0], x[1]+1):
        control.append(i)

In [3]:
#sample IDs of all male samples
male = []
temp = [(1,21), (48, 82)]
for x in temp:
    for i in range(x[0], x[1]+1):
        male.append(i)

In [4]:
#Cleaning up the qPCR sheet for use later
df1 = pd.read_excel('data.xlsx', sheet_name = "qPCR data")
Exp_ddCT = ['Unnamed: 1', '2^(-ddCT)', 'Unnamed: 26', 'Unnamed: 27', 'Unnamed: 28', 'Unnamed: 29', 'Unnamed: 30']
df1.drop(columns=df1.columns.difference(Exp_ddCT), inplace=True)
df1.columns = ['Sample', 'CDH13', 'FOXP2', 'NPHP4', 'NPY2R', 'PLXNA1', 'WNT3']
df1.drop([0,1,2,3], inplace = True)
df1.index = range(len(df1.index))
df1

Unnamed: 0,Sample,CDH13,FOXP2,NPHP4,NPY2R,PLXNA1,WNT3
0,001-B,0.545684,0.00753727,18.6086,77.8067,7.0111,0.35756
1,001-1,59.7663,5.76383,5.8132,35.9782,1.40219,0.0119916
2,001-2,16.3225,0.645179,0.476411,582.301,0.131244,0.0133136
3,001-3,0.473948,20.9663,31.6475,0.000609019,1.89361,0.310555
4,001-4,0.007175,1.90524,0.368475,0.00625132,0.29255,29.3889
...,...,...,...,...,...,...,...
650,,,,,,,
651,,,,,,,
652,,,,,,,
653,,,,,,,


In [5]:
#used to better parse the xxx-y format for sample IDs in the qPCR sheet
def checkIndex(sampleID, i):
    if not isinstance(sampleID, str):
        return False
    if len(sampleID) < 3:
        return False
    test = sampleID[:3]
    if not test.isdigit():
        return False
    if int(test) != i:
        return False
    return True

In [6]:
#pvalue analysis based on the different groups
def pval(df):
    #overall
    print('Overall')
    df_ANOVAF, df_ANOVAP = stats.f_oneway(df['baseline'], df['w1'], df['w2'], df['w3'], df['w4'])
    print(df_ANOVAP)
    
    print('Control Group')
    #Control Group
    dfC = df[df['group']==0]
    dfC_ANOVAF, dfC_ANOVAP = stats.f_oneway(dfC['baseline'], dfC['w1'], dfC['w2'], dfC['w3'], dfC['w4'])
    print(dfC_ANOVAP)
    
    print('Treatment Group')
    #Treatment Group
    dfP = df[df['group']==1]
    dfP_ANOVAF, dfP_ANOVAP = stats.f_oneway(dfP['baseline'], dfP['w1'], dfP['w2'], dfP['w3'], dfP['w4'])
    print(dfP_ANOVAP)
    
    print('Male Group')
    #Control Group
    dfC = df[df['gender']==0]
    dfC_ANOVAF, dfC_ANOVAP = stats.f_oneway(dfC['baseline'], dfC['w1'], dfC['w2'], dfC['w3'], dfC['w4'])
    print(dfC_ANOVAP)
    
    print('Female Group')
    #Treatment Group
    dfP = df[df['gender']==1]
    dfP_ANOVAF, dfP_ANOVAP = stats.f_oneway(dfP['baseline'], dfP['w1'], dfP['w2'], dfP['w3'], dfP['w4'])
    print(dfP_ANOVAP)
    print('\n')

In [7]:
#2^(-ddCT) analysis
names = ['CDH13', 'FOXP2', 'NPHP4', 'NPY2R', 'PLXNA1', 'WNT3']
for name in names:
    #Dataframe for analysis. Rows are samples, columns are when the sample was taken/sample ID
    df = pd.DataFrame(columns = ['baseline', 'w1', 'w2', 'w3', 'w4', 'sample ID'])
    print(f'Analysis for %s 2^(-ddCT)' %name)
    
    #parse the spreadsheet to find the useful stuff
    j = 0
    for i in range(1, 112 + 1):
        nRow = [float("NAN"),float("NAN"),float("NAN"),float("NAN"),float("NAN"), i]
        while checkIndex(df1['Sample'][j], i):
            switch = df1['Sample'][j][-1]
            if  switch == 'B':
                nRow[0] = float(df1[name][j])
            elif switch.isdigit():
                nRow[int(switch)] = float(df1[name][j])
            j+= 1
        df.loc[len(df.index)] = nRow 
    
    #remove NaN values 
    df.dropna(inplace = True)
    
    #identify treatment group    
    df['group'] = [0 if a in control else 1 for a in df['sample ID']]
    
    #identify male/female subjects
    df['gender'] = [0 if a in male else 1 for a in df['sample ID']]
    
    #p-value stuff
    pval(df)

Analysis for CDH13 2^(-ddCT)
Overall
0.33387573622158623
Control Group
0.7165708494951972
Treatment Group
0.38326684780816694
Male Group
0.4200467320470618
Female Group
0.37351695135443613


Analysis for FOXP2 2^(-ddCT)
Overall
0.15562949146416355
Control Group
0.36206160112775626
Treatment Group
0.12082928411284878
Male Group
0.5055625366588945
Female Group
0.46081367917537097


Analysis for NPHP4 2^(-ddCT)
Overall
0.13077137701720568
Control Group
0.5344938056739288
Treatment Group
0.11623554735898319
Male Group
0.386600061006195
Female Group
0.21497819987567543


Analysis for NPY2R 2^(-ddCT)
Overall
0.24951373739707086
Control Group
0.9668685456770805
Treatment Group
0.0401511458180166
Male Group
0.8880832779488165
Female Group
0.13939413983205126


Analysis for PLXNA1 2^(-ddCT)
Overall
0.25681368050249564
Control Group
0.3371656302761077
Treatment Group
0.011669870764741045
Male Group
0.255070387436114
Female Group
0.8924538535049369


Analysis for WNT3 2^(-ddCT)
Overall
0.33490017

In [8]:
#dCT analysis
names = ['CDH13 deltaCT', 'FOXP2 delta CT', 'NPHP4 delta CT', 'NPY2R deltaCT', 'PLXNA1 deltaCT', 'WNT3 deltaCT']
for name in names:
    print(f'Analysis for %s' %name )
    #read in file
    df = pd.read_excel('data.xlsx', sheet_name = name)
    expected = ['Unnamed: 0', 'Baseline', '1w', '2w', '3w', '4w', 'LF']
    df.drop(columns=df.columns.difference(expected), inplace=True)

    #label columns
    df.columns = ['sample ID', 'baseline', 'w1', 'w2', 'w3', 'w4', 'lf']

    #identify control group/treatment group
    df['group'] = [0 if a in control else 1 for a in df['sample ID']]
    
    #identify male/female subjects
    df['gender'] = [0 if a in male else 1 for a in df['sample ID']]

    #remove samples with NaN values
    df = df.dropna()
    
    #pval analysis
    pval(df)
    
    


Analysis for CDH13 deltaCT
Overall
0.5623642837714251
Control Group
0.1061549779781953
Treatment Group
0.3267787194096811
Male Group
0.4677353227124902
Female Group
0.9777761869029543


Analysis for FOXP2 delta CT
Overall
0.40776245543257617
Control Group
0.6983414749867833
Treatment Group
0.5197523496546733
Male Group
0.6397931546536805
Female Group
0.7625777174100105


Analysis for NPHP4 delta CT
Overall
0.004215878470713811
Control Group
0.10907848770905847
Treatment Group
0.021663048808889588
Male Group
0.2505379266769446
Female Group
0.012564842870483244


Analysis for NPY2R deltaCT
Overall
0.0007156144604027649
Control Group
0.18059009242339014
Treatment Group
0.003717656953880997
Male Group
0.05609913686779227
Female Group
0.014865486515567147


Analysis for PLXNA1 deltaCT
Overall
0.006726606379777116
Control Group
0.15048416940574305
Treatment Group
0.014957054937503413
Male Group
0.037686577104405615
Female Group
0.14775362286292212


Analysis for WNT3 deltaCT
Overall
0.122539

In [9]:
#useless analysis, sample size is too small
names = ['CDH13 deltaCT', 'FOXP2 delta CT', 'NPHP4 delta CT', 'NPY2R deltaCT', 'PLXNA1 deltaCT', 'WNT3 deltaCT']
for name in names:
    print(f'Analysis for %s' %name )
    #read in file
    df = pd.read_excel('data.xlsx', sheet_name = name)
    expected = ['Unnamed: 0', 'Baseline', '1w', '2w', '3w', '4w', 'LF']
    df.drop(columns=df.columns.difference(expected), inplace=True)

    #label columns
    df.columns = ['sample ID', 'baseline', 'w1', 'w2', 'w3', 'w4', 'lf']

    #identify control group/treatment group
    df['group'] = [0 if a in control else 1 for a in df['sample ID']]
    
    #identify male/female subjects
    df['gender'] = [0 if a in male else 1 for a in df['sample ID']]

    #remove samples with NaN values
    df = df.dropna()
    print('Male Control Group')
    #Control Group
    dfC = df[(df['gender']==0) & df['group']==0]
    dfC_ANOVAF, dfC_ANOVAP = stats.f_oneway(dfC['baseline'], dfC['w1'], dfC['w2'], dfC['w3'], dfC['w4'])
    print(dfC_ANOVAP)
    
    print('Male Treatment Group')
    #Treatment Group
    dfP = df[(df['gender']==0) & df['group']==1]
    dfP_ANOVAF, dfP_ANOVAP = stats.f_oneway(dfP['baseline'], dfP['w1'], dfP['w2'], dfP['w3'], dfP['w4'])
    print(dfP_ANOVAP)
    
    print('Female Control Group')
    #Control Group
    dfC = df[(df['gender']==1) & df['group']==0]
    dfC_ANOVAF, dfC_ANOVAP = stats.f_oneway(dfC['baseline'], dfC['w1'], dfC['w2'], dfC['w3'], dfC['w4'])
    print(dfC_ANOVAP)
    
    print('Female Treatment Group')
    #Treatment Group
    dfP = df[(df['gender']==1) & df['group']==1]
    dfP_ANOVAF, dfP_ANOVAP = stats.f_oneway(dfP['baseline'], dfP['w1'], dfP['w2'], dfP['w3'], dfP['w4'])
    print(dfP_ANOVAP)
    print('\n')

Analysis for CDH13 deltaCT
Male Control Group
0.7585283147392633
Male Treatment Group
0.7412596446819533
Female Control Group
0.2627442930323444
Female Treatment Group
0.3639232025187888


Analysis for FOXP2 delta CT
Male Control Group
0.5020965871238743
Male Treatment Group
0.6668605347284768
Female Control Group
0.5412576308019481
Female Treatment Group
0.5598090979187806


Analysis for NPHP4 delta CT
Male Control Group
0.01819549296486298
Male Treatment Group
0.37331594907084864
Female Control Group
0.040084824213717084
Female Treatment Group
0.06824636181946074


Analysis for NPY2R deltaCT
Male Control Group
0.01735664276819519
Male Treatment Group
0.021452658624999656
Female Control Group
0.010763785649527395
Female Treatment Group
0.1890394264026387


Analysis for PLXNA1 deltaCT
Male Control Group
0.052240583312871895
Male Treatment Group
0.23516159193230776
Female Control Group
0.04046008463962239
Female Treatment Group
0.10579420293463206


Analysis for WNT3 deltaCT
Male Contro