In [1]:
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

# Participants per wave

In [2]:
df_pp = pd.read_csv('../data/movez_metadata/Participant_Info.csv', sep=';', header=0)

In [3]:
print("Number of schools:" , len(df_pp.School.unique()))

Number of schools: 28


In [4]:
print("Number of participants W1:", df_pp.W1.sum())
print("Number of participants W2:", df_pp.W2.sum())
print("Number of participants W3:", df_pp.W3.sum())
print("Number of participants W4:", df_pp.W4.sum())
print("Number of participants W5:", df_pp.W5.sum())
print("Number of participants W6:", df_pp.W6.sum())
print("Number of participants W7:", df_pp.W7.sum())

Number of participants W1: 843
Number of participants W2: 901
Number of participants W3: 868
Number of participants W4: 744
Number of participants W5: 1017
Number of participants W6: 755
Number of participants W7: 745


In [5]:
print("Number of participants Year 1:", df_pp.Y1_Sample.sum())
print("Number of participants Year 2:", df_pp.W4.sum())
print("Number of participants Year 3:", df_pp.Y3_Sample.sum())

Number of participants Year 1: 951
Number of participants Year 2: 744
Number of participants Year 3: 1032


# Descriptive analysis

In [6]:
df_vars = pd.read_csv('../output/model_data.csv', sep=',', header=0)
df_vars

Unnamed: 0,Child,isInfluencer,influencer_ratio_total,WC,affect,drives,social,polarity,health,cogproc,...,BMI,flynn_score,n2b_score,happy_score,prosocial_score,public_indiv_score,esteem_score,age,sex,fas
0,1046,0,0.185185,,,,,,,,...,19.0,3.4,3.6,5.250000,,2.333333,4.900000,14.0,1,8.0
1,1047,0,0.240741,,,,,,,,...,19.0,,,6.000000,,,,14.0,0,0.0
2,1049,0,0.157407,,,,,,,,...,17.0,,4.3,5.333333,,,3.450000,13.0,0,8.0
3,1050,0,0.194444,,,,,,,,...,22.0,,2.5,6.333333,,,,13.0,0,10.0
4,1055,0,0.209877,,,,,,,,...,28.0,3.8,4.4,6.000000,,5.416667,,13.0,0,9.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
639,4889,0,0.254167,,,,,,,,...,16.0,,2.9,5.250000,,,4.526923,10.0,1,9.5
640,4890,0,0.154167,6.875000,3.775833,6.073750,13.016667,-0.009236,0.000000,12.109167,...,18.0,,,6.000000,,,3.565385,10.0,1,10.5
641,6135,0,0.255556,18.327869,9.820164,2.809508,7.474426,0.077832,0.196885,11.224590,...,17.0,,,6.333333,,,4.384615,10.0,1,9.0
642,6137,0,0.155556,2.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,,,,6.000000,,,4.076923,10.0,1,10.0


In [7]:
df_vars.columns

Index(['Child', 'isInfluencer', 'influencer_ratio_total', 'WC', 'affect',
       'drives', 'social', 'polarity', 'health', 'cogproc', 'subjectivity',
       'num_likes', 'g_num_text', 'g_num_media', 'i_num_text', 'i_num_media',
       'i_num_recepients', 'Steps', 'Minutes_MVPA', 'BMI', 'flynn_score',
       'n2b_score', 'happy_score', 'prosocial_score', 'public_indiv_score',
       'esteem_score', 'age', 'sex', 'fas'],
      dtype='object')

In [8]:
df_vars.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Child,644.0,2215.246894,860.137025,1046.0,1484.0,2262.5,2998.25,6138.0
isInfluencer,644.0,0.149068,0.356433,0.0,0.0,0.0,0.0,1.0
influencer_ratio_total,644.0,0.26856,0.137535,0.020172,0.157365,0.265292,0.358709,0.756614
WC,402.0,4.058575,2.7306,1.0,2.578028,3.651515,5.0,30.394737
affect,402.0,4.879672,7.382569,0.0,0.0,3.578618,6.694208,100.0
drives,402.0,4.456607,5.680184,0.0,0.0,3.16073,6.407006,45.0
social,402.0,8.813711,7.105446,0.0,3.615685,8.580742,12.827043,53.333333
polarity,402.0,0.026315,0.10946,-0.6,-0.006478,0.0,0.04609,0.7
health,402.0,0.306338,1.377824,0.0,0.0,0.0,0.0,14.285714
cogproc,402.0,7.055841,6.14354,0.0,2.090435,6.25,10.0,33.33


In [9]:
# Create subsets
df_influencer = df_vars[df_vars['isInfluencer']==1]
df_nonInfluencer = df_vars[df_vars['isInfluencer']==0]

In [10]:
df_influencer.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Child,96.0,2382.4375,779.213501,1179.0,1626.25,2318.5,2998.5,6138.0
isInfluencer,96.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
influencer_ratio_total,96.0,0.493488,0.078179,0.415155,0.434343,0.466327,0.525865,0.756614
WC,73.0,4.463851,3.390439,1.0,2.705882,3.8,5.0,20.5
affect,73.0,5.680777,8.339605,0.0,0.952381,3.104167,6.25,50.0
drives,73.0,4.859827,5.72092,0.0,0.0,3.291667,7.380714,25.0
social,73.0,9.266483,5.783349,0.0,5.13,8.822466,12.572558,25.0
polarity,73.0,0.019102,0.117286,-0.5,-0.006763,0.006364,0.063141,0.366667
health,73.0,0.28562,1.436898,0.0,0.0,0.0,0.0,11.363636
cogproc,73.0,6.70688,5.226778,0.0,2.5,6.051667,10.0,25.625


In [11]:
df_nonInfluencer.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Child,548.0,2185.958029,870.897856,1046.0,1421.75,1813.5,2995.25,6137.0
isInfluencer,548.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
influencer_ratio_total,548.0,0.229156,0.103633,0.020172,0.138889,0.237447,0.313308,0.414651
WC,329.0,3.96865,2.558838,1.0,2.5,3.615385,5.0,30.394737
affect,329.0,4.701919,7.154791,0.0,0.0,3.594545,6.703277,100.0
drives,329.0,4.367139,5.675975,0.0,0.0,3.14381,6.267912,45.0
social,329.0,8.713248,7.370623,0.0,2.777778,8.527941,12.853656,53.333333
polarity,329.0,0.027916,0.107769,-0.6,-0.00627,0.0,0.044363,0.7
health,329.0,0.310934,1.366591,0.0,0.0,0.0,0.0,14.285714
cogproc,329.0,7.133271,6.333498,0.0,1.851667,6.266505,10.010909,33.33


# Test Normality

In [12]:
# HO: normal distributed
print("Complete dataset:")
print("age:", stats.shapiro(df_vars['age'].dropna()))
print("sex:", stats.shapiro(df_vars['sex'].dropna()))
print("fas:", stats.shapiro(df_vars['fas'].dropna()))
print("flynn:", stats.shapiro(df_vars['flynn_score'].dropna()))
print("n2b:", stats.shapiro(df_vars['n2b_score'].dropna()))
print("happy:", stats.shapiro(df_vars['happy_score'].dropna()))
print("prosocial:", stats.shapiro(df_vars['prosocial_score'].dropna()))
print("public_indiv:", stats.shapiro(df_vars['public_indiv_score'].dropna()))
print("esteem:", stats.shapiro(df_vars['esteem_score'].dropna()))
print("WC:", stats.shapiro(df_vars['WC'].dropna()))
print("affect:", stats.shapiro(df_vars['affect'].dropna()))
print("drives:", stats.shapiro(df_vars['drives'].dropna()))
print("social:", stats.shapiro(df_vars['social'].dropna()))
print("polarity:", stats.shapiro(df_vars['polarity'].dropna()))
print("health:", stats.shapiro(df_vars['health'].dropna()))
print("cogproc:", stats.shapiro(df_vars['cogproc'].dropna()))
print("subjectivity:", stats.shapiro(df_vars['subjectivity'].dropna()))
print("num_likes:", stats.shapiro(df_vars['num_likes'].dropna()))
print("g_num_text:", stats.shapiro(df_vars['g_num_text'].dropna()))
print("g_num_media:", stats.shapiro(df_vars['g_num_media'].dropna()))
print("i_num_text:", stats.shapiro(df_vars['i_num_text'].dropna()))
print("i_num_media:", stats.shapiro(df_vars['i_num_media'].dropna()))
print("i_num_recipients:", stats.shapiro(df_vars['i_num_recepients'].dropna()))
print("Steps:", stats.shapiro(df_vars['Steps'].dropna()))
print("MVPA:", stats.shapiro(df_vars['Minutes_MVPA'].dropna()))
print("BMI:", stats.shapiro(df_vars['BMI'].dropna()))

Complete dataset:
age: ShapiroResult(statistic=0.8766106963157654, pvalue=3.5194230328110634e-22)
sex: ShapiroResult(statistic=0.6351578235626221, pvalue=8.554997633557578e-35)
fas: ShapiroResult(statistic=0.8480838537216187, pvalue=2.2820984737270333e-24)
flynn: ShapiroResult(statistic=0.9490635395050049, pvalue=6.972482147205028e-09)
n2b: ShapiroResult(statistic=0.9830875992774963, pvalue=3.4983288514922606e-06)
happy: ShapiroResult(statistic=0.91359943151474, pvalue=1.6841478955665133e-18)
prosocial: ShapiroResult(statistic=0.8966207504272461, pvalue=4.5386319079106577e-14)
public_indiv: ShapiroResult(statistic=0.9673480987548828, pvalue=3.459984441178676e-07)
esteem: ShapiroResult(statistic=0.9761071801185608, pvalue=1.1027486834791489e-07)
WC: ShapiroResult(statistic=0.6978573203086853, pvalue=3.055058913653136e-26)
affect: ShapiroResult(statistic=0.548971951007843, pvalue=7.744177471917805e-31)
drives: ShapiroResult(statistic=0.7568017244338989, pvalue=6.4744705666706074e-24)
soc

In [13]:
# HO: normal distributed
print("Subset non-influencer:")
print("age:", stats.shapiro(df_influencer['age'].dropna()))
print("sex:", stats.shapiro(df_influencer['sex'].dropna()))
print("fas:", stats.shapiro(df_influencer['fas'].dropna()))
print("flynn:", stats.shapiro(df_influencer['flynn_score'].dropna()))
print("n2b:", stats.shapiro(df_influencer['n2b_score'].dropna()))
print("happy:", stats.shapiro(df_influencer['happy_score'].dropna()))
print("prosocial:", stats.shapiro(df_influencer['prosocial_score'].dropna()))
print("public_indiv:", stats.shapiro(df_influencer['public_indiv_score'].dropna()))
print("esteem:", stats.shapiro(df_influencer['esteem_score'].dropna()))
print("WC:", stats.shapiro(df_influencer['WC'].dropna()))
print("affect:", stats.shapiro(df_influencer['affect'].dropna()))
print("drives:", stats.shapiro(df_influencer['drives'].dropna()))
print("social:", stats.shapiro(df_influencer['social'].dropna()))
print("polarity:", stats.shapiro(df_influencer['polarity'].dropna()))
print("health:", stats.shapiro(df_influencer['health'].dropna()))
print("cogproc:", stats.shapiro(df_influencer['cogproc'].dropna()))
print("subjectivity:", stats.shapiro(df_influencer['subjectivity'].dropna()))
print("num_likes:", stats.shapiro(df_influencer['num_likes'].dropna()))
print("g_num_text:", stats.shapiro(df_influencer['g_num_text'].dropna()))
print("g_num_media:", stats.shapiro(df_influencer['g_num_media'].dropna()))
print("i_num_text:", stats.shapiro(df_influencer['i_num_text'].dropna()))
print("i_num_media:", stats.shapiro(df_influencer['i_num_media'].dropna()))
print("i_num_recipients:", stats.shapiro(df_influencer['i_num_recepients'].dropna()))
print("Steps:", stats.shapiro(df_influencer['Steps'].dropna()))
print("MVPA:", stats.shapiro(df_influencer['Minutes_MVPA'].dropna()))
print("BMI:", stats.shapiro(df_influencer['BMI'].dropna()))

Subset non-influencer:
age: ShapiroResult(statistic=0.8593832850456238, pvalue=4.357614358241335e-08)
sex: ShapiroResult(statistic=0.5699613094329834, pvalue=2.5204458491682362e-15)
fas: ShapiroResult(statistic=0.8841823935508728, pvalue=4.278621474895772e-07)
flynn: ShapiroResult(statistic=0.9527000784873962, pvalue=0.05962485074996948)
n2b: ShapiroResult(statistic=0.9808568358421326, pvalue=0.19554658234119415)
happy: ShapiroResult(statistic=0.84872967004776, pvalue=1.760793466587529e-08)
prosocial: ShapiroResult(statistic=0.7460970282554626, pvalue=6.154025555815679e-08)
public_indiv: ShapiroResult(statistic=0.9098082780838013, pvalue=0.0005560331628657877)
esteem: ShapiroResult(statistic=0.9686172008514404, pvalue=0.0312094334512949)
WC: ShapiroResult(statistic=0.666197657585144, pvalue=1.3392354933361439e-11)
affect: ShapiroResult(statistic=0.6501142382621765, pvalue=6.627746528681255e-12)
drives: ShapiroResult(statistic=0.8113951683044434, pvalue=2.999997761321538e-08)
social: Sh

In [14]:
# HO: normal distributed
print("Subset non-influencer:")
print("age:", stats.shapiro(df_nonInfluencer['age'].dropna()))
print("sex:", stats.shapiro(df_nonInfluencer['sex'].dropna()))
print("fas:", stats.shapiro(df_nonInfluencer['fas'].dropna()))
print("flynn:", stats.shapiro(df_nonInfluencer['flynn_score'].dropna()))
print("n2b:", stats.shapiro(df_nonInfluencer['n2b_score'].dropna()))
print("happy:", stats.shapiro(df_nonInfluencer['happy_score'].dropna()))
print("prosocial:", stats.shapiro(df_nonInfluencer['happy_score'].dropna()))
print("public_indiv:", stats.shapiro(df_nonInfluencer['public_indiv_score'].dropna()))
print("esteem:", stats.shapiro(df_nonInfluencer['esteem_score'].dropna()))
print("WC:", stats.shapiro(df_nonInfluencer['WC'].dropna()))
print("affect:", stats.shapiro(df_nonInfluencer['affect'].dropna()))
print("drives:", stats.shapiro(df_nonInfluencer['drives'].dropna()))
print("social:", stats.shapiro(df_nonInfluencer['social'].dropna()))
print("polarity:", stats.shapiro(df_nonInfluencer['polarity'].dropna()))
print("health:", stats.shapiro(df_nonInfluencer['health'].dropna()))
print("cogproc:", stats.shapiro(df_nonInfluencer['cogproc'].dropna()))
print("subjectivity:", stats.shapiro(df_nonInfluencer['subjectivity'].dropna()))
print("num_likes:", stats.shapiro(df_nonInfluencer['num_likes'].dropna()))
print("g_num_text:", stats.shapiro(df_nonInfluencer['g_num_text'].dropna()))
print("g_num_media:", stats.shapiro(df_nonInfluencer['g_num_media'].dropna()))
print("i_num_text:", stats.shapiro(df_nonInfluencer['i_num_text'].dropna()))
print("i_num_media:", stats.shapiro(df_nonInfluencer['i_num_media'].dropna()))
print("i_num_recipients:", stats.shapiro(df_nonInfluencer['i_num_recepients'].dropna()))
print("Steps:", stats.shapiro(df_nonInfluencer['Steps'].dropna()))
print("MVPA:", stats.shapiro(df_nonInfluencer['Minutes_MVPA'].dropna()))
print("BMI:", stats.shapiro(df_nonInfluencer['BMI'].dropna()))

Subset non-influencer:
age: ShapiroResult(statistic=0.8748496770858765, pvalue=1.3921548260082623e-20)
sex: ShapiroResult(statistic=0.6365506649017334, pvalue=1.9450171808455066e-32)
fas: ShapiroResult(statistic=0.8418557643890381, pvalue=6.108322276347995e-23)
flynn: ShapiroResult(statistic=0.9484214782714844, pvalue=4.969580302827126e-08)
n2b: ShapiroResult(statistic=0.9819223284721375, pvalue=1.1259722668910399e-05)
happy: ShapiroResult(statistic=0.9196588397026062, pvalue=2.7280316016836366e-16)
prosocial: ShapiroResult(statistic=0.9196588397026062, pvalue=2.7280316016836366e-16)
public_indiv: ShapiroResult(statistic=0.972320020198822, pvalue=1.4070808902033605e-05)
esteem: ShapiroResult(statistic=0.9758210182189941, pvalue=8.659839068059227e-07)
WC: ShapiroResult(statistic=0.7106395363807678, pvalue=1.8757166995163386e-23)
affect: ShapiroResult(statistic=0.5166974067687988, pvalue=5.0224563590069394e-29)
drives: ShapiroResult(statistic=0.7412618398666382, pvalue=2.5388138966845975

# Test equal variances

In [15]:
# H0: equal variance 
print("age:", stats.levene(df_influencer['age'], df_nonInfluencer['age'], center='median'))
print("sex:", stats.levene(df_influencer['sex'], df_nonInfluencer['sex'], center='median'))
print("fas:", stats.levene(df_influencer['fas'], df_nonInfluencer['fas'], center='median'))
print("flynn:", stats.levene(df_influencer['flynn_score'].dropna(), df_nonInfluencer['flynn_score'].dropna(), center='median'))
print("n2b:", stats.levene(df_influencer['n2b_score'].dropna(), df_nonInfluencer['n2b_score'].dropna(), center='median'))
print("happy:", stats.levene(df_influencer['happy_score'].dropna(), df_nonInfluencer['happy_score'].dropna(), center='median'))
print("prosocial:", stats.levene(df_influencer['prosocial_score'].dropna(), df_nonInfluencer['prosocial_score'].dropna(), center='median'))
print("public_indiv:", stats.levene(df_influencer['public_indiv_score'].dropna(), df_nonInfluencer['public_indiv_score'].dropna(), center='median'))
print("esteem:", stats.levene(df_influencer['esteem_score'].dropna(), df_nonInfluencer['esteem_score'].dropna(), center='median'))
print("WC:", stats.levene(df_influencer['WC'].dropna(), df_nonInfluencer['WC'].dropna(), center='median'))
print("affect:", stats.levene(df_influencer['affect'].dropna(), df_nonInfluencer['affect'].dropna(), center='median'))
print("drives:", stats.levene(df_influencer['drives'].dropna(), df_nonInfluencer['drives'].dropna(), center='median'))
print("social:", stats.levene(df_influencer['social'].dropna(), df_nonInfluencer['social'].dropna(), center='median'))
print("polarity:", stats.levene(df_influencer['polarity'].dropna(), df_nonInfluencer['polarity'].dropna(), center='median'))
print("health:", stats.levene(df_influencer['health'].dropna(), df_nonInfluencer['health'].dropna(), center='median'))
print("cogproc:", stats.levene(df_influencer['cogproc'].dropna(), df_nonInfluencer['cogproc'].dropna(), center='median'))
print("subjectivity:", stats.levene(df_influencer['subjectivity'].dropna(), df_nonInfluencer['subjectivity'].dropna(), center='median'))
print("num_likes:", stats.levene(df_influencer['num_likes'].dropna(), df_nonInfluencer['num_likes'].dropna(), center='median'))
print("g_num_text:", stats.levene(df_influencer['g_num_text'].dropna(), df_nonInfluencer['g_num_text'].dropna(), center='median'))
print("g_num_media:", stats.levene(df_influencer['g_num_media'].dropna(), df_nonInfluencer['g_num_media'].dropna(), center='median'))
print("i_num_text:", stats.levene(df_influencer['i_num_text'].dropna(), df_nonInfluencer['i_num_text'].dropna(), center='median'))
print("i_num_media:", stats.levene(df_influencer['i_num_media'].dropna(), df_nonInfluencer['i_num_media'].dropna(), center='median'))
print("i_num_recepients:", stats.levene(df_influencer['i_num_recepients'].dropna(), df_nonInfluencer['i_num_recepients'].dropna(), center='median'))
print("Steps:", stats.levene(df_influencer['Steps'].dropna(), df_nonInfluencer['Steps'].dropna(), center='median'))
print("Minutes_MVPA:", stats.levene(df_influencer['Minutes_MVPA'].dropna(), df_nonInfluencer['Minutes_MVPA'].dropna(), center='median'))
print("BMI:", stats.levene(df_influencer['BMI'].dropna(), df_nonInfluencer['BMI'].dropna(), center='median'))



age: LeveneResult(statistic=10.474030791999462, pvalue=0.0012728899187590772)
sex: LeveneResult(statistic=114.76851610209297, pvalue=9.451106938409972e-25)
fas: LeveneResult(statistic=0.4943586130885811, pvalue=0.48224432709443343)
flynn: LeveneResult(statistic=0.07166328057400885, pvalue=0.7891094426605453)
n2b: LeveneResult(statistic=0.33977285115524947, pvalue=0.5601912980686161)
happy: LeveneResult(statistic=1.0379719326447447, pvalue=0.3086846941242009)
prosocial: LeveneResult(statistic=0.6770450016088153, pvalue=0.4112131757106521)
public_indiv: LeveneResult(statistic=0.04829666257643159, pvalue=0.8261801244916014)
esteem: LeveneResult(statistic=1.1712666210360405, pvalue=0.2796286848675246)
WC: LeveneResult(statistic=0.9209098253855826, pvalue=0.3378158889740429)
affect: LeveneResult(statistic=1.2517927152111932, pvalue=0.26388178798284123)
drives: LeveneResult(statistic=0.37601284824744835, pvalue=0.540093357377142)
social: LeveneResult(statistic=3.31589451310915, pvalue=0.0693

### non equal variance:
#### age, sex, happines, prosocial, cogproc, 

# Mann Whitney U test

In [16]:
# H0: equal variance 
print("age:", stats.mannwhitneyu(df_influencer['age'], df_nonInfluencer['age'], alternative = 'two-sided'))
print("sex:", stats.mannwhitneyu(df_influencer['sex'], df_nonInfluencer['sex'], alternative = 'two-sided'))
print("fas:", stats.mannwhitneyu(df_influencer['fas'], df_nonInfluencer['fas'], alternative = 'two-sided'))
print("flynn:", stats.mannwhitneyu(df_influencer['flynn_score'].dropna(), df_nonInfluencer['flynn_score'].dropna(), alternative = 'two-sided'))
print("n2b:", stats.mannwhitneyu(df_influencer['n2b_score'].dropna(), df_nonInfluencer['n2b_score'].dropna(), alternative = 'two-sided'))
print("happy:", stats.mannwhitneyu(df_influencer['happy_score'].dropna(), df_nonInfluencer['happy_score'].dropna(), alternative = 'two-sided'))
print("prosocial:", stats.mannwhitneyu(df_influencer['prosocial_score'].dropna(), df_nonInfluencer['prosocial_score'].dropna(), alternative = 'two-sided'))
print("public_indiv:", stats.mannwhitneyu(df_influencer['public_indiv_score'].dropna(), df_nonInfluencer['public_indiv_score'].dropna(), alternative = 'two-sided'))
print("esteem:", stats.mannwhitneyu(df_influencer['esteem_score'].dropna(), df_nonInfluencer['esteem_score'].dropna(), alternative = 'two-sided'))
print("Steps:", stats.mannwhitneyu(df_influencer['Steps'].dropna(), df_nonInfluencer['Steps'].dropna(), alternative = 'two-sided'))
print("Minutes_MVPA:", stats.mannwhitneyu(df_influencer['Minutes_MVPA'].dropna(), df_nonInfluencer['Minutes_MVPA'].dropna(), alternative = 'two-sided'))
print("BMI:", stats.mannwhitneyu(df_influencer['BMI'].dropna(), df_nonInfluencer['BMI'].dropna(), alternative = 'two-sided'))
print("num_likes:", stats.mannwhitneyu(df_influencer['num_likes'].dropna(), df_nonInfluencer['num_likes'].dropna(), alternative = 'two-sided'))
print("g_num_text:", stats.mannwhitneyu(df_influencer['g_num_text'].dropna(), df_nonInfluencer['g_num_text'].dropna(), alternative = 'two-sided'))
print("g_num_media:", stats.mannwhitneyu(df_influencer['g_num_media'].dropna(), df_nonInfluencer['g_num_media'].dropna(), alternative = 'two-sided'))
print("i_num_text:", stats.mannwhitneyu(df_influencer['i_num_text'].dropna(), df_nonInfluencer['i_num_text'].dropna(), alternative = 'two-sided'))
print("i_num_media:", stats.mannwhitneyu(df_influencer['i_num_media'].dropna(), df_nonInfluencer['i_num_media'].dropna(), alternative = 'two-sided'))
print("i_num_recepients:", stats.mannwhitneyu(df_influencer['i_num_recepients'].dropna(), df_nonInfluencer['i_num_recepients'].dropna(), alternative = 'two-sided'))
print("WC:", stats.mannwhitneyu(df_influencer['WC'].dropna(), df_nonInfluencer['WC'].dropna(), alternative = 'two-sided'))
print("affect:", stats.mannwhitneyu(df_influencer['affect'].dropna(), df_nonInfluencer['affect'].dropna(), alternative = 'two-sided'))
print("drives:", stats.mannwhitneyu(df_influencer['drives'].dropna(), df_nonInfluencer['drives'].dropna(), alternative = 'two-sided'))
print("social:", stats.mannwhitneyu(df_influencer['social'].dropna(), df_nonInfluencer['social'].dropna(), alternative = 'two-sided'))
print("polarity:", stats.mannwhitneyu(df_influencer['polarity'].dropna(), df_nonInfluencer['polarity'].dropna(), alternative = 'two-sided'))
print("health:", stats.mannwhitneyu(df_influencer['health'].dropna(), df_nonInfluencer['health'].dropna(), alternative = 'two-sided'))
print("cogproc:", stats.mannwhitneyu(df_influencer['cogproc'].dropna(), df_nonInfluencer['cogproc'].dropna(), alternative = 'two-sided'))
print("subjectivity:", stats.mannwhitneyu(df_influencer['subjectivity'].dropna(), df_nonInfluencer['subjectivity'].dropna(), alternative = 'two-sided'))

age: MannwhitneyuResult(statistic=21152.0, pvalue=0.0015736118735123767)
sex: MannwhitneyuResult(statistic=31784.0, pvalue=0.0001632893265173434)
fas: MannwhitneyuResult(statistic=28534.5, pvalue=0.18131818811802314)
flynn: MannwhitneyuResult(statistic=6736.5, pvalue=0.23578481106774052)
n2b: MannwhitneyuResult(statistic=20744.0, pvalue=0.3733398968727405)
happy: MannwhitneyuResult(statistic=29232.0, pvalue=0.03211967742634156)
prosocial: MannwhitneyuResult(statistic=8357.0, pvalue=0.014859427187422825)
public_indiv: MannwhitneyuResult(statistic=10563.0, pvalue=0.001581431006671881)
esteem: MannwhitneyuResult(statistic=21974.5, pvalue=0.09554429681342154)
Steps: MannwhitneyuResult(statistic=30350.0, pvalue=0.016137662465457843)
Minutes_MVPA: MannwhitneyuResult(statistic=27441.5, pvalue=0.49890530121735477)
BMI: MannwhitneyuResult(statistic=24271.5, pvalue=0.6897261162253938)
num_likes: MannwhitneyuResult(statistic=13241.0, pvalue=0.2570834547316587)
g_num_text: MannwhitneyuResult(stati

In [17]:
df_vars.dtypes

Child                       int64
isInfluencer                int64
influencer_ratio_total    float64
WC                        float64
affect                    float64
drives                    float64
social                    float64
polarity                  float64
health                    float64
cogproc                   float64
subjectivity              float64
num_likes                 float64
g_num_text                float64
g_num_media               float64
i_num_text                float64
i_num_media               float64
i_num_recepients          float64
Steps                     float64
Minutes_MVPA              float64
BMI                       float64
flynn_score               float64
n2b_score                 float64
happy_score               float64
prosocial_score           float64
public_indiv_score        float64
esteem_score              float64
age                       float64
sex                         int64
fas                       float64
dtype: object