In [4]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

whole_df = pd.read_csv('../data/clean_df.csv', index_col=[0])

In [2]:
whole_df['macro_bhv'].unique()

array(['Individual', 'Social', 'Social-Ludic', 'Agonistic', nan, 'Public',
       'Ludic', 'Sexual'], dtype=object)

In [33]:
from queries import where_subject_and_date
where_subject_and_date(whole_df, 'Karl', '2019-04-15')

Unnamed: 0,date,subject,period,reg,behavior,receptor,duration,bhv_category,macro_bhv,rel_duration
0,2019-04-15,Karl,pregame,1,cam,[],15.0,moving,Individual,0.024671
1,2019-04-15,Karl,pregame,1,forr,[],34.0,feeding,Individual,0.055921
2,2019-04-15,Karl,pregame,1,sen,[],52.0,resting,Individual,0.085526
3,2019-04-15,Karl,pregame,1,cam,[],5.0,moving,Individual,0.008224
4,2019-04-15,Karl,pregame,1,sen,[],34.0,resting,Individual,0.055921
5,2019-04-15,Karl,pregame,1,cam,[],22.0,moving,Individual,0.036184
6,2019-04-15,Karl,pregame,1,alim,[],34.0,feeding,Individual,0.055921
7,2019-04-15,Karl,pregame,1,ras,[],4.0,grooming,Individual,0.006579
8,2019-04-15,Karl,pregame,1,sen,[],44.0,resting,Individual,0.072368
10,2019-04-15,Karl,pregame,1,sen,[],28.0,resting,Individual,0.046053


In [36]:
macro_df = whole_df.groupby(['date', 'reg','subject','macro_bhv', 'period'])['duration'].sum().reset_index()
macro_df

Unnamed: 0,date,reg,subject,macro_bhv,period,duration
0,2019-04-15,1,Hadia,Individual,pregame,46.0
1,2019-04-15,1,Hadia,Social,pregame,359.0
2,2019-04-15,1,Hadia,Social-Ludic,pregame,195.0
3,2019-04-15,1,Jawie,Individual,pregame,419.0
4,2019-04-15,1,Jawie,Ludic,pregame,40.0
...,...,...,...,...,...,...
1249,2019-07-19,2,Popo,Social,postgame,161.0
1250,2019-07-19,2,Storma,Individual,postgame,500.0
1251,2019-07-19,2,Storma,Ludic,postgame,12.0
1252,2019-07-19,2,Storma,Social,postgame,78.0


In [55]:
whole_df[(whole_df['subject']=='Popo')&(whole_df['macro_bhv']=='Sexual')]

Unnamed: 0,date,subject,period,reg,behavior,receptor,duration,bhv_category,macro_bhv,rel_duration
5413,2019-05-06,Popo,pregame,1,sex,['Jawie'],19.0,sex,Sexual,0.031667
5415,2019-05-06,Popo,pregame,1,sex,['Jawie'],19.0,sex,Sexual,0.031667
5418,2019-05-06,Popo,pregame,1,sex,['Jawie'],60.0,sex,Sexual,0.1
5422,2019-05-06,Popo,pregame,1,sex,['Jawie'],30.0,sex,Sexual,0.05
5424,2019-05-06,Popo,pregame,1,sex,['Jawie'],163.0,sex,Sexual,0.271667
5426,2019-05-06,Popo,pregame,1,sex,['Jawie'],137.0,sex,Sexual,0.228333


In [59]:
bhvsas=macro_df[(macro_df['subject']=='Locky')&(macro_df['period']=='postgame')].groupby('macro_bhv')['duration'].sum()

px.pie(bhvsas.reset_index(), values='duration', names='macro_bhv')

In [48]:
bhvsas.reset_index()

Unnamed: 0,macro_bhv,duration
0,Agonistic,66.0
1,Individual,5310.0
2,Ludic,5054.0
3,Social,4686.0
4,Social-Ludic,1715.0


# Histogram for all behaviors

In [4]:
import math
beh=(whole_df['behavior'].unique())
def behavior_histogram(behavior):
    return go.Histogram(x=whole_df[(whole_df['behavior']==behavior)&(whole_df['duration']<150)]['duration'],
    name=behavior,
    marker={'color':'#C5D845'})
fig = make_subplots(rows=4, cols=8, subplot_titles=beh, vertical_spacing=0.2)
for b in range(len(beh)):
    row = math.floor(b/8+1)
    col = b%8+1
    fig.append_trace(behavior_histogram(beh[b]), row, col)
fig.update_layout({'height':600,
                   'showlegend':False, 
                   'plot_bgcolor': 'rgba(21, 47, 18, 255)',
                   'paper_bgcolor': 'rgba(21, 47, 18, 255)',
                   'font':{'color': 'rgba(230,230,230,255)'},
                   'yaxis':{'showgrid':False}
                   })
fig.update_xaxes(showticklabels=False)
fig.update_yaxes(showticklabels=False, showgrid=False)
fig.show()

In [37]:
import math
beh=(whole_df['macro_bhv'].unique())
def behavior_histogram(behavior):
    return go.Histogram(x=macro_df[(macro_df['macro_bhv']==behavior)&(macro_df['duration']<150)]['duration'],
    name=behavior,
    marker={'color':'#C5D845'})
fig = make_subplots(rows=4, cols=2, subplot_titles=beh, vertical_spacing=0.2)
for b in range(len(beh)):
    row = math.floor(b/2+1)
    col = b%2+1
    fig.append_trace(behavior_histogram(beh[b]), row, col)
fig.update_layout({'height':600,
                   'showlegend':False, 
                   'plot_bgcolor': 'rgba(21, 47, 18, 255)',
                   'paper_bgcolor': 'rgba(21, 47, 18, 255)',
                   'font':{'color': 'rgba(230,230,230,255)'},
                   'yaxis':{'showgrid':False}
                   })
fig.update_xaxes(showticklabels=False)
fig.update_yaxes(showticklabels=False, showgrid=False)
fig.show()

# Boxplot by Behavior and monkey

In [74]:
DATA = macro_df
def box_by_behavior(subjects, behavior):
    if not isinstance(subjects, list):
        subjects=[subjects]
    df = DATA[(DATA['subject'].isin(subjects))&(DATA['macro_bhv']==behavior)]
    fig = px.box(df, x='period', y='duration', color='period', width=500, title=behavior)
    return fig

fig = box_by_behavior(list(DATA['subject'].unique()), 'Agonistic')
fig.show()

def pie_by_period(subjects, period):
    pass

In [7]:
whole_df.to_csv('../data/clean_df.csv')

# Relative durations

In [14]:
whole_df


Unnamed: 0,date,subject,period,reg,behavior,receptor,duration,bhv_category,macro_bhv,rel_duration
0,2019-04-15,Karl,pregame,1,cam,[],15.0,moving,Individual,0.024671
1,2019-04-15,Karl,pregame,1,forr,[],34.0,feeding,Individual,0.055921
2,2019-04-15,Karl,pregame,1,sen,[],52.0,resting,Individual,0.085526
3,2019-04-15,Karl,pregame,1,cam,[],5.0,moving,Individual,0.008224
4,2019-04-15,Karl,pregame,1,sen,[],34.0,resting,Individual,0.055921
...,...,...,...,...,...,...,...,...,...,...
8109,2019-07-19,Hadia,postgame,2,alim,[],94.0,feeding,Individual,0.156667
8110,2019-07-19,Hadia,postgame,2,jacrob,[],21.0,playing,Ludic,0.035000
8111,2019-07-19,Hadia,postgame,2,bra,[],3.0,moving,Individual,0.005000
8112,2019-07-19,Hadia,postgame,2,alim,[],192.0,feeding,Individual,0.320000


In [9]:
whole_df['rel_duration'] = whole_df.groupby(['date', 'subject', 'reg', 'period'])['duration'].apply(lambda x:x/x.sum())


In [10]:
from queries import *


In [11]:
df = where_subject_and_date(whole_df, 'Karl', '2019-04-15')
df[df['reg']==1]['rel_duration'].sum()

0.9999999999999999

In [12]:
whole_df.to_csv('../data/clean_df.csv')

# Frequencies

Filtrar por mono

In [13]:
freq = pd.read_csv('../data/freqs_df.csv', index_col=[0])
freq_popo_game = freq[(freq['subject']=='Popo')]
freq_popo_game.head()

Unnamed: 0,date,reg,subject,period,macro_bhv,freq
9,2019-04-15,1,Popo,pregame,Individual,1
10,2019-04-15,1,Popo,pregame,Social,2
11,2019-04-15,1,Popo,pregame,Social-Ludic,2
23,2019-04-15,2,Popo,pregame,Individual,8
24,2019-04-15,2,Popo,pregame,Ludic,2


Sumar las frecuencias en cada periodo

In [14]:
aaa= freq_popo_game.groupby(['period','macro_bhv'])['freq'].sum().reset_index()
aaa

Unnamed: 0,period,macro_bhv,freq
0,game,Agonistic,5
1,game,Individual,232
2,game,Ludic,129
3,game,Social,116
4,game,Social-Ludic,34
5,postgame,Individual,225
6,postgame,Ludic,74
7,postgame,Public,3
8,postgame,Social,127
9,postgame,Social-Ludic,35


Crear primero una nueva columna con todo a 0 y después calcular las frecuencias relativas

In [39]:
import numpy as np
aaa['relative_freq'] = 0
def d(x, period):
    sum = x.groupby('period')['freq'].sum()
    x['relative_freq'] += (x['period']==period).astype(int)*x['freq'] / sum[period]

d(aaa,'pregame')
d(aaa,'game')
d(aaa,'postgame')
aaa

Unnamed: 0,period,macro_bhv,freq,relative_freq
0,game,Agonistic,5,0.00969
1,game,Individual,232,0.449612
2,game,Ludic,129,0.25
3,game,Social,116,0.224806
4,game,Social-Ludic,34,0.065891
5,postgame,Individual,225,0.484914
6,postgame,Ludic,74,0.159483
7,postgame,Public,3,0.006466
8,postgame,Social,127,0.273707
9,postgame,Social-Ludic,35,0.075431


Diagrama de barras con frecuencias relativas para cada mono

In [40]:
px.bar(aaa, x='period', y='relative_freq', color='macro_bhv', width=400)

# Histogram freqs

In [17]:
import numpy as np
beh=(freq['macro_bhv'].unique())
beh =np.delete(beh, np.where(beh=='Sexual'))

In [72]:
import math

def behavior_histogram(behavior):
    return go.Histogram(x=freq[(freq['macro_bhv']==behavior)]['freq'],
    name=behavior,
    marker={'color':'#C5D845'})
fig = make_subplots(rows=3, cols=2, subplot_titles=beh, vertical_spacing=0.2)
for b in range(len(beh)):
    row = math.floor(b/2+1)
    col = b%2+1
    fig.append_trace(behavior_histogram(beh[b]), row, col)
fig.update_layout({'height':800,
                   'showlegend':False, 
                   'plot_bgcolor': 'rgba(21, 47, 18, 255)',
                   'paper_bgcolor': 'rgba(21, 47, 18, 255)',
                   'font':{'color': 'rgba(230,230,230,255)'},
                   'yaxis':{'showgrid':False}
                   })
fig.update_xaxes(showticklabels=False)
fig.update_yaxes(showticklabels=False, showgrid=False)
fig.show()

Exception: The (row, col) pair sent is out of range. Use Figure.print_grid to view the subplot grid. 

In [22]:
fig = go.Figure()
fig.add_trace(go.Histogram(x=freq[(freq['macro_bhv']=='Ludic')&(freq['period']=='pregame')]['freq']))
fig.add_trace(go.Histogram(x=freq[(freq['macro_bhv']=='Ludic')&(freq['period']=='game')]['freq']))
fig.add_trace(go.Histogram(x=freq[(freq['macro_bhv']=='Ludic')&(freq['period']=='postgame')]['freq']))




# Overlay both histograms
fig.update_layout(barmode='overlay')
# Reduce opacity to see both histograms
fig.update_traces(opacity=0.75)
fig.show()

In [32]:
fig = go.Figure()
behavior = 'Ludic'
fig.add_trace(go.Histogram(
    x=freq[(freq['macro_bhv']==behavior)&(freq['period']=='pregame')]['freq'],
    name='Pregame', # name used in legend and hover labels
    xbins=dict( # bins used for histogram
        start=-4.0,
        end=30,
        size=0.5
    ),
    marker_color='#EB89B5',
    opacity=0.75
))
fig.add_trace(go.Histogram(
    x=freq[(freq['macro_bhv']==behavior)&(freq['period']=='game')]['freq'],
    name='game',
    xbins=dict(
        start=-3.0,
        end=31,
        size=0.5
    ),
    marker_color='#330C73',
    opacity=0.75
))

fig.update_layout(
    title_text=behavior, # title of plot
    xaxis_title_text='Frequencies', # xaxis label
    yaxis_title_text='Count', # yaxis label
    bargap=0.2, # gap between bars of adjacent location coordinates
    bargroupgap=0.1, # gap between bars of the same location coordinates
    width=500
)

fig.show()

# Individual T tests
La idea es poder ver si hay diferencias individuales en los periodos. Se puede hacer un T-test para cada individuo. 

In [1]:
import pandas as pd
import plotly.express as px
import scipy.stats as stats
import queries

def individual_t_test(subject, periodA, periodB, bhv):
    pass

df = pd.read_csv('../data/clean_df.csv', index_col=[0])\
                        .groupby(['period', 'date','subject', 'reg','macro_bhv'])\
                        ['duration'] \
                        .sum() \
                        .unstack(level='macro_bhv', fill_value=0) \
                        .reset_index()
df.head()

macro_bhv,period,date,subject,reg,Agonistic,Individual,Ludic,Public,Sexual,Social,Social-Ludic
0,game,2019-05-20,Hadia,1,0.0,448.0,24.0,0.0,0.0,138.0,0.0
1,game,2019-05-20,Hadia,2,0.0,297.0,0.0,136.0,0.0,131.0,33.0
2,game,2019-05-20,Jawie,1,0.0,0.0,0.0,0.0,0.0,600.0,0.0
3,game,2019-05-20,Jawie,2,0.0,489.0,0.0,0.0,0.0,94.0,0.0
4,game,2019-05-20,Karl,1,0.0,540.0,0.0,0.0,0.0,0.0,0.0


In [4]:
def behavior_means_stds(df, behavior):
    return pd.DataFrame({
            'means': [get_stat(df, 'pregame', behavior, 'mean',),
                      get_stat(df, 'game', behavior, 'mean',),
                      get_stat(df, 'postgame', behavior, 'mean',)],
            'stds': [get_stat(df, 'pregame',behavior,  'std',),
                      get_stat(df, 'game',behavior,  'std',),
                      get_stat(df, 'postgame',behavior,  'std',)]
                      },
            index=['pregame', 'game', 'postgame'])


def get_stat(df, period, bhv, stat):
    return df.query(f"period=='{period}'") \
             .describe() \
             .loc[stat,bhv]

popo = df.query("subject=='Popo'")
statis = behavior_means_stds(popo,'Social-Ludic')
statis

Unnamed: 0,means,stds
pregame,103.266667,162.08617
game,57.166667,113.91318
postgame,110.333333,173.675546


In [5]:
jaw_pre = queries.filter_subject_period(df, 'Popo', 'pregame').reset_index()
jaw_game = queries.filter_subject_period(df, 'Popo', 'game').reset_index()

## Tests for normality
### Calculate difference between one period and the other one

With this we can see if the distribution of this differences is more or less normal, to decide which test statistic to conduct. 

In [13]:
ind_diff = jaw_pre['Ludic'] - jaw_game['Ludic']

### Plots

With a histogram we can visually tell if the distribution is more or less normal. 
And then a QQ plot (buscar info de para qué carajo sirve)

In [12]:
px.histogram(ind_diff)

In [8]:
import numpy as np
import plotly.graph_objects as go

# CONVERTIR TODO ESTO EN UN MÉTODO!!!

qq = stats.probplot(ind_diff, dist='norm')

x = np.array([qq[0][0][0], qq[0][0][-1]])
fig = go.Figure()
fig.add_scatter(x=qq[0][0], y=qq[0][1], mode='markers')
fig.add_scatter(x=x, y=qq[1][1] + qq[1][0]*x, mode='lines')
fig.layout.update(showlegend=False)
fig.show()


###

También hacemos tests para ver si siguen distribución normal. En ambos casos, la hipótesis nula es que siguen normal distribution. 
- En Shapiro test, el p-value es mayor que 0.05, -> _NO_ se rechaza la hipótesis nula -> los datos son normales
- En Anderson test, el p-value no es mayor que ninguno de los critical-values, -> NO se rechaza la hipótesis nula

(repasar Anderson test)^

In [9]:
print(stats.shapiro(ind_diff))
print(stats.anderson(ind_diff))


ShapiroResult(statistic=0.9788524508476257, pvalue=0.7942844033241272)
AndersonResult(statistic=0.3069887322460474, critical_values=array([0.521, 0.593, 0.712, 0.83 , 0.988]), significance_level=array([15. , 10. ,  5. ,  2.5,  1. ]))


### Equality of variances
Ttests have the assumption of equality of variances. If variances are not equal, should perform another test. In Bartlett's test, $H_0: \sigma^2_1 = \sigma^2_2 = ... = \sigma^2_n$. In this example, as pvalue > 0.05, we don't reject $H_0$. Therefore, we conduct a standard t-test instead of a Welch's t-test.

In [20]:
bartlett = stats.bartlett(jaw_pre['Ludic'], jaw_game['Ludic'])
bartlett

BartlettResult(statistic=1.827411247734494, pvalue=0.1764336222524635)

In [27]:
stats.ttest_ind(jaw_pre['Ludic'], jaw_game['Ludic'], equal_var=False).pvalue

0.037994047349158074

In [4]:
import yaml
with open('config.yaml', 'r') as f:
    config = yaml.load(f, Loader=yaml.FullLoader)
type(config['DATASET']['subjects'])

list

In [5]:
config['DATASET']['subjects'].append('All')

In [6]:
config['DATASET']['subjects']

['Popo', 'Hadia', 'Karl', 'Locky', 'Storma', 'Jawie', 'All']

### T-test vs paired t-test

In [60]:
import pandas as pd
import queries
import scipy.stats as stats
from data_manager import unstack_behaviors



def ttests(df):
    unstacked = unstack_behaviors(df)
    unstacked.sort_values(by='subject', inplace=True)
    results = {}
    for sub in unstacked.subject.unique():
        for bhv in df['macro_bhv'].unique():
            ttest = single_ttest(sub, unstacked, bhv, 'pregame', 'game')
            paired = paired_ttest(sub, unstacked, bhv, 'pregame', 'game')
            results[(bhv, 'ttest')] = results.get((bhv, 'ttest'), [])
            results[(bhv, 'ttest')].append(ttest)
            results[(bhv, 'paired')] = results.get((bhv, 'paired'), [])
            results[(bhv, 'paired')].append(paired)
    return pd.DataFrame(results, index=unstacked.subject.unique())
    
    
          
def single_ttest(subject, df, behavior, periodA, periodB):
    """Returns ttest values to see if means of behavior between periodA and periodB are different"""
    A_data = queries.filter_subject_period(df, subject, periodA) \
                    .reset_index()[behavior]
    B_data = queries.filter_subject_period(df, subject, periodB) \
                    .reset_index()[behavior]
 
    if len(A_data) < 30:
        pd.concat([A_data, pd.Series(0, index=[30])])
    if len(B_data) < 30:
        pd.concat([B_data, pd.Series(0, index=[30])])
    equal_var = stats.bartlett(A_data, B_data).pvalue > 0.05
    ttest = stats.ttest_ind(A_data,B_data, equal_var=equal_var)

    significative = ttest.pvalue < 0.05
    signif_string = 'is' if significative else 'is not' 
    return ttest.pvalue

def paired_ttest(subject, df, behavior, periodA, periodB):
    """Returns ttest values to see if means of behavior between periodA and periodB are different"""
    A_data = queries.filter_subject_period(df, subject, periodA) \
                    .reset_index()[behavior]
    B_data = queries.filter_subject_period(df, subject, periodB) \
                    .reset_index()[behavior]
    if len(A_data) < 30:
        A_data = pd.concat([A_data, pd.Series(0, index=[30])])
    if len(B_data) < 30:
        B_data = pd.concat([B_data, pd.Series(0, index=[30])])
    equal_var = stats.bartlett(A_data, B_data).pvalue > 0.05
    ttest = stats.ttest_rel(A_data,B_data)

    significative = ttest.pvalue < 0.05
    signif_string = 'is' if significative else 'is not' 
    return ttest.pvalue


In [61]:
df = pd.read_csv('../data/clean_df.csv', index_col=[0])\
    .groupby(['date', 'reg','subject','macro_bhv', 'period'])\
    ['duration'].sum().reset_index()
unstacked = unstack_behaviors(df)
unstacked.sort_values(by='subject', inplace=True)
results = {}

results = {}
for sub in unstacked.subject.unique():
    for bhv in df['macro_bhv'].unique():
        print(sub, bhv)
        ttest = single_ttest(sub, unstacked, bhv, 'pregame', 'game')
        paired = paired_ttest(sub, unstacked, bhv, 'pregame', 'game')
        results[(bhv, 'ttest')] = results.get((bhv, 'ttest'), [])
        results[(bhv, 'ttest')].append(ttest)
        results[(bhv, 'paired')] = results.get((bhv, 'paired'), [])
        results[(bhv, 'paired')].append(paired)

Hadia Individual
Hadia Social
Hadia Social-Ludic
Hadia Ludic
Hadia Public
Hadia Agonistic
Hadia Sexual
Jawie Individual
Jawie Social
Jawie Social-Ludic
Jawie Ludic
Jawie Public
Jawie Agonistic
Jawie Sexual
Karl Individual
Karl Social
Karl Social-Ludic
Karl Ludic
Karl Public
Karl Agonistic
Karl Sexual
Locky Individual
Locky Social
Locky Social-Ludic
Locky Ludic
Locky Public
Locky Agonistic
Locky Sexual
Popo Individual
Popo Social
Popo Social-Ludic
Popo Ludic
Popo Public
Popo Agonistic
Popo Sexual
Storma Individual
Storma Social
Storma Social-Ludic
Storma Ludic
Storma Public
Storma Agonistic
Storma Sexual


  numer = (Ntot*1.0 - k) * log(spsq) - np.sum((Ni - 1.0)*log(ssq), axis=0)
  numer = (Ntot*1.0 - k) * log(spsq) - np.sum((Ni - 1.0)*log(ssq), axis=0)
  numer = (Ntot*1.0 - k) * log(spsq) - np.sum((Ni - 1.0)*log(ssq), axis=0)
  numer = (Ntot*1.0 - k) * log(spsq) - np.sum((Ni - 1.0)*log(ssq), axis=0)
  numer = (Ntot*1.0 - k) * log(spsq) - np.sum((Ni - 1.0)*log(ssq), axis=0)
  numer = (Ntot*1.0 - k) * log(spsq) - np.sum((Ni - 1.0)*log(ssq), axis=0)
  numer = (Ntot*1.0 - k) * log(spsq) - np.sum((Ni - 1.0)*log(ssq), axis=0)
  numer = (Ntot*1.0 - k) * log(spsq) - np.sum((Ni - 1.0)*log(ssq), axis=0)
  numer = (Ntot*1.0 - k) * log(spsq) - np.sum((Ni - 1.0)*log(ssq), axis=0)
  numer = (Ntot*1.0 - k) * log(spsq) - np.sum((Ni - 1.0)*log(ssq), axis=0)
  numer = (Ntot*1.0 - k) * log(spsq) - np.sum((Ni - 1.0)*log(ssq), axis=0)
  numer = (Ntot*1.0 - k) * log(spsq) - np.sum((Ni - 1.0)*log(ssq), axis=0)
  numer = (Ntot*1.0 - k) * log(spsq) - np.sum((Ni - 1.0)*log(ssq), axis=0)
  numer = (Ntot*1.0 - k) 

In [65]:
results = pd.DataFrame(results, index=df.subject.unique())

In [71]:
res = pd.concat([results[bhv] for bhv in df.macro_bhv.unique()]).reset_index().fillna(0)

In [79]:
results

Unnamed: 0_level_0,Individual,Individual,Social,Social,Social-Ludic,Social-Ludic,Ludic,Ludic,Public,Public,Agonistic,Agonistic,Sexual,Sexual
Unnamed: 0_level_1,ttest,paired,ttest,paired,ttest,paired,ttest,paired,ttest,paired,ttest,paired,ttest,paired
Hadia,0.067866,0.092298,0.053825,0.070056,0.402286,0.436591,0.220347,0.254894,0.447325,0.450974,0.253663,0.25573,,
Jawie,0.063922,0.082944,0.08489,0.120531,0.325582,0.325582,0.686289,0.695465,0.498849,0.503477,0.735358,0.745288,,
Karl,0.158034,0.644872,0.224075,0.225793,0.325582,0.325582,,,0.523213,0.536203,0.010703,0.010703,,
Locky,0.252563,0.286389,0.105047,0.12801,,,0.052647,0.053626,0.472155,0.495344,0.467822,0.463107,,
Popo,0.750731,0.74247,0.28258,0.314092,0.207556,0.176208,0.037726,0.056087,0.207883,0.207883,0.646879,0.656732,0.325582,0.325582
Storma,0.927918,0.928381,0.188812,0.191924,0.025407,0.023989,0.310687,0.342717,0.223541,0.223541,0.276946,0.279458,,


In [4]:
px.scatter(df, x='date', y='Agonistic', color='subject')

In [36]:
import pandas as pd
df =pd.read_csv('../data/clean_df.csv', index_col=[0])\
                        .groupby(['date', 'reg','subject','macro_bhv', 'period'])\
                        ['duration'].sum().reset_index()

In [42]:
import queries

queries.filter_subject_period(df, 'Locky', 'pregame').reset_index().groupby(['period','reg', 'date']).sum('duration')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,index,duration
period,reg,date,Unnamed: 3_level_1,Unnamed: 4_level_1
pregame,1,2019-04-15,15,602.0
pregame,1,2019-04-16,75,600.0
pregame,1,2019-04-17,174,521.0
pregame,1,2019-04-18,90,605.0
pregame,1,2019-04-19,114,600.0
pregame,1,2019-04-22,141,600.0
pregame,1,2019-04-23,167,600.0
pregame,1,2019-04-24,782,600.0
pregame,1,2019-04-25,423,620.0
pregame,1,2019-04-26,483,605.0


In [21]:
import plotly.express as px

pre = df[(df['macro_bhv']=='Individual')&(df['period']=='pregame')]
game = df[(df['macro_bhv']=='Individual')&(df['period']=='game')]
len(pre), len(game)

(159, 166)

In [22]:
diff = pre['duration'].reset_index() - game[:len(pre)]['duration'].reset_index()


In [23]:
px.histogram(diff['duration'])

# Linear Mixed Models

In [13]:
import statsmodels.formula.api as smf
import pandas as pd
import plotly.express as px
import data_manager as dm

df = dm.unstack_behaviors(pd.read_csv('../data/clean_df.csv'))
df['period'] = pd.Categorical(df['period'], ['pregame', 'game', 'postgame'])
df.sort_values('period', inplace=True)
df.head()


macro_bhv,period,date,subject,reg,Agonistic,Individual,Ludic,Public,Sexual,Social,Social-Ludic
536,pregame,2019-05-07,Storma,2,15.0,498.0,0.0,0.0,0.0,38.0,0.0
420,pregame,2019-04-22,Jawie,2,0.0,387.0,0.0,0.0,0.0,179.0,0.0
419,pregame,2019-04-22,Jawie,1,0.0,131.0,0.0,0.0,0.0,469.0,0.0
418,pregame,2019-04-22,Hadia,2,0.0,34.0,0.0,0.0,0.0,566.0,0.0
417,pregame,2019-04-22,Hadia,1,0.0,0.0,0.0,0.0,0.0,600.0,0.0


In [17]:
df['Positive'] = df['Ludic'] + df['Social'] + df['Social-Ludic']
df['Negative'] = df['Agonistic'] + df['Public']
px.box(df[df['subject']=='Popo'], x='period', y='Ludic')


In [49]:
lmm = smf.mixedlm('Ludic ~ C(period)', df, groups=df['subject'], re_formula='~1+C(period)').fit()
lmm.summary()


Maximum Likelihood optimization failed to converge. Check mle_retvals


Retrying MixedLM optimization with lbfgs



0,1,2,3
Model:,MixedLM,Dependent Variable:,Ludic
No. Observations:,537,Method:,REML
No. Groups:,6,Scale:,6731.8192
Min. group size:,87,Log-Likelihood:,-3131.2550
Max. group size:,90,Converged:,Yes
Mean group size:,89.5,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,32.711,16.534,1.978,0.048,0.304,65.118
C(period)[T.game],20.677,15.046,1.374,0.169,-8.812,50.166
C(period)[T.postgame],8.239,14.608,0.564,0.573,-20.392,36.870
Group Var,1415.908,14.905,,,,
Group x C(period)[T.game] Cov,1133.934,12.919,,,,
C(period)[T.game] Var,908.191,18.245,,,,
Group x C(period)[T.postgame] Cov,111.473,12.146,,,,
C(period)[T.game] x C(period)[T.postgame] Cov,88.457,15.340,,,,
C(period)[T.postgame] Var,828.958,16.880,,,,


Model:	MixedLM         |	Dependent Variable:	Ludic
No. Observations:	537 |	Method:	REML
No. Groups:	6           |	Scale:	7041.3338
Min. group size:	87  |	Log-Likelihood:	-3139.6742
Max. group size:	90  |	Converged:	Yes
Mean group size:	89.5|		

 
|      .                | Coef.    | 	Std.Err. | z	 | P>z	 |  [0.025	| 0.975] |
|-----------------------|----------|----         | ----  | ----  |  ----    | ----   |
| Intercept	            |  32.711  | 21.170	     | 1.545 | 0.122 | 	-8.781	| 74.204 |
| C(period)[T.game]	    | 20.747   | 8.858	     | 2.342 | 0.019 | 3.387	| 38.108 |
| C(period)[T.postgame]	| 8.217    | 8.870	     | 0.926 | 0.354 | 	-9.169  | 25.602 |
| Group Var	            |2454.321  | 19.178      | 	.	 | .     |  .       | .      |


In [50]:
lmm.params

Intercept                                        32.711111
C(period)[T.game]                                20.676877
C(period)[T.postgame]                             8.239004
Group Var                                         0.210331
Group x C(period)[T.game] Cov                     0.168444
C(period)[T.game] Var                             0.134910
Group x C(period)[T.postgame] Cov                 0.016559
C(period)[T.game] x C(period)[T.postgame] Cov     0.013140
C(period)[T.postgame] Var                         0.123140
dtype: float64

In [51]:
lmm.random_effects

{'Hadia': Group                    33.821037
 C(period)[T.game]        27.039239
 C(period)[T.postgame]    47.422042
 dtype: float64,
 'Jawie': Group                   -28.573277
 C(period)[T.game]       -22.876142
 C(period)[T.postgame]    -9.076010
 dtype: float64,
 'Karl': Group                   -29.739776
 C(period)[T.game]       -23.810306
 C(period)[T.postgame]    -9.200016
 dtype: float64,
 'Locky': Group                   -18.911461
 C(period)[T.game]       -15.149293
 C(period)[T.postgame]     4.121671
 dtype: float64,
 'Popo': Group                    58.383729
 C(period)[T.game]        46.789343
 C(period)[T.postgame]   -24.774695
 dtype: float64,
 'Storma': Group                   -14.980252
 C(period)[T.game]       -11.992840
 C(period)[T.postgame]    -8.492992
 dtype: float64}

# Trendlines

In [31]:
px.scatter(df[df['subject']=='Popo'], x='date', y='Ludic')

In [33]:
df.sort_values(by='date', inplace=True)

In [46]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

ma = df.loc[df['subject']=='Popo', 'Social-Ludic'].rolling(10).mean()
fig = make_subplots()
fig.add_trace(go.Scatter(x=df.loc[df['subject']=='Popo', 'date'], y=ma))
fig.add_trace(go.Scatter(x=df.loc[df['subject']=='Popo', 'date'], y=df.loc[df['subject']=='Popo', 'Social-Ludic'], mode='markers'))
fig.show()
