In [None]:
"""
author: EdgardoCS @FSU Jena
date: 16.04.2025
"""

import pandas as pd
import statsmodels.formula.api as smf
from statsmodels.stats.anova import anova_lm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd

pd.options.mode.chained_assignment = None

In [None]:
input_data = "output/data_sorted.xlsx"
columns = ['id', 'points', 'gender', 'segment', 'location', 'type']

data = pd.read_excel(input_data, usecols=columns)

# focus on female and male for now
data = (data[data['gender'].isin(['female', 'male'])])

# Conditions:
# front + self + female
# front + self + male
# front + other + female
# front + other + male
#
# back + self + female
# back + self + male
# back + other + female
# back + other + male

In [3]:
data['segment'] = data['segment'].astype('category')
data['location'] = data['location'].astype('category')
data['type'] = data['type'].astype('category')
data['gender'] = data['gender'].astype('category')
data['id'] = data['id'].astype('category')  # Random effect

In [5]:

model = smf.mixedlm(
    "points ~ segment * location * type * gender",
    data,
    groups=data["id"]
)
result = model.fit()
print(result.summary())

# Reference: female, others, armpit
# Intercept 2,004, p = 0,000 (***)

# location (front) 0,032, p 0,466 (ns)
# type (self) -0,004, p = 0,941 (ns)
# gender (male) 0,082, p = 0,207 (ns)
# β, p
# segment[T.chest] -0.266, p = 0.000 (***)
# segment[T.feet] -0.034, p = 0.518 (ns)
# segment[T.hair] -0.670, p = 0.000 (***)
# segment[T.hand] -0.298, p = 0.000 (***)
# segment[T.knee] -0.367, p = 0.000 (***)
# segment[T.mouth] -0.583, p = 0.000 (***)
# segment[T.neck] -0.637, p = 0.000 (***)
# segment[T.pelvis] -0.802, p = 0.000 (***)

# Compared to the highest rated segment, armpit, the others were rated significantly lower with the exception of the feet.
# chest is rated higher in the back (β = -0.207, p = 0.016), as well as knee (β = -0.335, p = 0.017)
# feet are rated higher in the front (β = 0.178, p = 0.007), as well as neck (β = 0.156, p = 0.027)
# No significant three- or four-way interactions involving type (p = 0,941) or gender (p = 0,207) were observed.


                                   Mixed Linear Model Regression Results
Model:                              MixedLM                 Dependent Variable:                 points     
No. Observations:                   26915                   Method:                             REML       
No. Groups:                         2409                    Scale:                              0.9005     
Min. group size:                    1                       Log-Likelihood:                     -39005.2520
Max. group size:                    36                      Converged:                          Yes        
Mean group size:                    11.2                                                                   
-----------------------------------------------------------------------------------------------------------
                                                                Coef.  Std.Err.    z    P>|z| [0.025 0.975]
---------------------------------------------------------------

In [6]:
females = data[data['gender'] == 'female'].copy()
males = data[data['gender'] == 'male'].copy()

In [None]:
# 1. Is there any difference between female and male when smelling others?
target = (data[
              (data['type'] == 'other') &
              (data['gender'].isin(['female', 'male']))]
          .copy())
three_model = ols("""points ~ C(segment) + C(gender) + C(location) +
               C(segment):C(gender) + C(segment):C(location) + C(gender):C(location) +
               C(segment):C(gender):C(location)""", data=target).fit()
res1 = anova_lm(three_model, typ=2)


# C(segment) -> p= 0,000
# C(gender) -> p= 0,884
# C(location) -> p= 0,366
# C(segment):C(gender) -> p= 0,355
# C(segment):C(location) -> p= 7,385
# C(gender):C(location) -> p= 0,224
# C(segment):C(gender):C(location) -> p= 0,495

# Answer, apparently *None* given the three model calculation

In [None]:
# 2. Is there any difference between female and male when smelling themselves?
target = (data[
              (data['type'] == 'self') &
              (data['gender'].isin(['female', 'male']))]
          .copy())
three_model = ols("""points ~ C(segment) + C(gender) + C(location) +
               C(segment):C(gender) + C(segment):C(location) + C(gender):C(location) +
               C(segment):C(gender):C(location)""", data=target).fit()
res2 = anova_lm(three_model, typ=2)

# C(segment) -> p= 0,000
# C(gender) -> p= 0,009
# C(location) -> p= 0,152
# C(segment):C(gender) -> p= 0,977
# C(segment):C(location) -> p= 0,022
# C(gender):C(location) -> p= 0,668
# C(segment):C(gender):C(location) -> p= 0,816

# Answer, there is a main effect of gender, and there is significant difference
# between segment and location when participants smell themselves

In [None]:
# target["segment_location"] = target["segment"].astype(str) + "_" + target["location"].astype(str)
# tukey = pairwise_tukeyhsd(target["points"], target["segment_location"])
# print(tukey)

In [22]:
# Take both genders, front and back when smelling themselves
target = data[
    (data['location'].isin(['front', 'back'])) &
    (data['type'] == 'self') &
    (data['gender'].isin(['female', 'male']))
    ].copy()
target['group1'] = (
        target['location'].astype(str) + '_'
        + target['segment'].astype(str)
)
target['group2'] = (
        target['gender'].astype(str) + '_'
        + target['segment'].astype(str)
)
target['group3'] = (
        target['gender'].astype(str) + '_'
        + target['location'].astype(str) + '_'
        + target['segment'].astype(str)
)
tukey = pairwise_tukeyhsd(endog=target['points'], groups=target['group2'], alpha=0.05)


# group     meandiff	p-adj	lower	upper	reject
# (front vs back)
# armpit	-0.0481	    0.9999	-0.2054	0.1093	False
# chest	    -0.2054	    0.7353	-0.5271	0.1163	False
# feet	    0.0671	    0.9986	-0.1164	0.2506	False
# hair	    -0.0627	    1.0	    -0.3866	0.2612	False
# hand	    -0.0428	    1.0	    -0.281	0.1954	False
# knee	    -0.0394	    1.0	    -0.597	0.5182	False
# mouth	    -0.3381	    0.1214	-0.7076	0.0314	False
# neck	    0.0695	    1.0	    -0.224	0.3629	False
# pelvis	0.0039	    1.0	    -0.1752	0.1831	False

# group     meandiff	p-adj	lower	upper	reject
# (female vs male)
# armpit	-0.084	    0.9009	-0.236	0.0679	False
# chest	    -0.0897	    0.9999	-0.4026	0.2231	False
# feet	    -0.075	    0.9957	-0.2619	0.1119	False
# hair	    -0.0351	    1.0	    -0.3826	0.3125	False
# hand	    -0.0119	    1.0	    -0.2541	0.2302	False
# knee	    -0.0618	    1.0	    -0.6508	0.5272	False
# mouth	    -0.0487	    1.0	    -0.3572	0.2598	False
# neck	    -0.0892	    0.9999	-0.3832	0.2048	False
# pelvis	    -0.0146	    1.0	    -0.1944	0.1652	False

# When comparing the same locations, there is no statistically difference between gender and location regarding smelling themselves

  quad_r = quad(f, low, high, args=args, full_output=self.full_output,


In [23]:
# 3. is there any difference when males smell themselves vs when they smell others?
target = (data[
              (data['type'].isin(['self', 'other'])) &
              (data['gender'] == 'male')]
          .copy())
three_model = ols("""points ~ C(segment) + C(type) + C(location) +
               C(segment):C(type) + C(segment):C(location) + C(type):C(location) +
               C(segment):C(type):C(location)""", data=target).fit()
res3 = anova_lm(three_model, typ=2)

# C(segment) -> p= 0,000
# C(type) -> p= 0,031
# C(location) -> p= 0,043
# C(segment):C(type) -> p= 0,537
# C(segment):C(location) -> p= 0,001
# C(type):C(location) -> p= 0,884
# C(segment):C(type):C(location) -> p= 0,984

# Answer, there is a main effect of type, and there is significant difference
# between segment and location when participants smell themselves vs when they smell others

In [25]:
# Lets check the above
target = data[
    (data['location'].isin(['front', 'back'])) &
    (data['gender'] == 'male') &
    (data['type'].isin(['self', 'other']))
    ].copy()
target['group1'] = (
        target['type'].astype(str) + '_'
        + target['segment'].astype(str)
)
target['group2'] = (
        target['location'].astype(str) + '_'
        + target['segment'].astype(str)
)
tukey = pairwise_tukeyhsd(endog=target['points'], groups=target['group2'], alpha=0.05)

# group     meandiff	p-adj	lower	upper	reject
# (self vs other)
# armpit	-0.0506	    0.9999	-0.217	0.1158	False
# chest	    -0.0745	    1.0	    -0.39	0.2411	False
# feet	    -0.0883	    0.9916	-0.2955	0.1188	False
# hair	    0.0869	    1.0	    -0.2664	0.4402	False
# hand	    0.0305	    1.0	    -0.2601	0.321	False
# knee	    0.1451	    1.0	    -0.4341	0.7243	False
# mouth	    -0.127	    0.997	-0.4533	0.1994	False
# neck	    -0.0494	    1.0	    -0.3232	0.2244	False
# pelvis	-0.1268	    0.7585	-0.3285	0.0749	False

# group     meandiff	p-adj	lower	upper	reject
# (front vs back)
# armpit	-0.1062	    0.8087	-0.2815	0.0691	False
# chest	    -0.3709	    0.0032	-0.6774	-0.0643	True
# feet	    0.0838	    0.9961	-0.1266	0.2942	False
# hair	    -0.1236	    0.999	-0.4705	0.2233	False
# hand	    -0.0699	    1.0	    -0.3747	0.2348	False
# knee	    -0.0048	    1.0	    -0.5552	0.5456	False
# mouth	    -0.1502	    0.9962	-0.5281	0.2277	False
# neck	    0.11	    0.9898	-0.1436	0.3636	False
# pelvis	-0.0228	    1.0	    -0.2266	0.1811	False



  quad_r = quad(f, low, high, args=args, full_output=self.full_output,


In [26]:
# 4. is there any difference when females smell themselves vs when they smell others?
target = (data[
              (data['type'].isin(['self', 'other'])) &
              (data['gender'] == 'female')]
          .copy())
three_model = ols("""points ~ C(segment) + C(type) + C(location) +
               C(segment):C(type) + C(segment):C(location) + C(type):C(location) +
               C(segment):C(type):C(location)""", data=target).fit()
res4 = anova_lm(three_model, typ=2)

# C(segment) -> p= 0,000
# C(type) -> p= 0,955
# C(location) -> p= 0,569
# C(segment):C(type) -> p= 0,043
# C(segment):C(location) -> p= 7,251
# C(type):C(location) -> p= 0,478
# C(segment):C(type):C(location) -> p= 0,881

In [27]:
# Lets check the above
target = data[
    (data['location'].isin(['front', 'back'])) &
    (data['gender'] == 'female') &
    (data['type'].isin(['self', 'other']))
    ].copy()

target['group'] = target['type'].astype(str) + '_' + target['segment'].astype(str)

tukey = pairwise_tukeyhsd(endog=target['points'], groups=target['group'], alpha=0.05)

# group     meandiff	p-adj	lower	upper	reject
# (self vs other)
# armpit	0.0552	0.9952	-0.0808	0.1912	False
# chest	    0.0606	1.0	    -0.1973	0.3184	False
# feet	    -0.1047	0.7349	-0.2687	0.0593	False
# hair	    0.0679	1.0	    -0.1833	0.3191	False
# hand	    -0.0807	0.9985	-0.3	0.1385	False
# knee	    0.2483	0.9338	-0.2234	0.72	False
# mouth	    -0.3381	0.1214	-0.7076	0.0314	False
# pelvis	0.0039	1.0	    -0.1752	0.1831	False
# neck 	    0.0695	1.0	    -0.224	0.3629	False
# mouth	    -0.0743	0.9999	-0.3243	0.1756	False
# neck 	    0.0341	1.0	    -0.1943	0.2625	False
# pelvis	-0.0141	1.0	    -0.1776	0.1494	False

# None

  quad_r = quad(f, low, high, args=args, full_output=self.full_output,


      Multiple Comparison of Means - Tukey HSD, FWER=0.05       
   group1       group2    meandiff p-adj   lower   upper  reject
----------------------------------------------------------------
other_armpit  other_chest  -0.2388 0.0019 -0.4308 -0.0468   True
other_armpit   other_feet   0.0877 0.8254 -0.0591  0.2344  False
other_armpit   other_hair  -0.6117    0.0 -0.7929 -0.4304   True
other_armpit   other_hand   -0.139 0.4644 -0.3266  0.0486  False
other_armpit   other_knee  -0.2762 0.1677 -0.5901  0.0376  False
other_armpit  other_mouth  -0.5443    0.0 -0.7317 -0.3569   True
other_armpit   other_neck  -0.5035    0.0 -0.6605 -0.3466   True
other_armpit other_pelvis  -0.7729    0.0 -0.9229 -0.6228   True
other_armpit  self_armpit   0.0552 0.9952 -0.0808  0.1912  False
other_armpit   self_chest  -0.1782 0.2782  -0.396  0.0395  False
other_armpit    self_feet   -0.017    1.0 -0.1692  0.1352  False
other_armpit    self_hair  -0.5437    0.0  -0.763 -0.3245   True
other_armpit    self_hand

In [30]:
# 5. Comparing the body as whole, is there any difference between female and males when smelling themselves in front and back, and what about when smelling others?
target = (data[
              (data['type'].isin(['self', 'other'])) &
              (data['gender'].isin(['female', 'male']))]
          .copy())
three_model = ols("""points ~ C(gender) + C(type) + C(location) +
               C(gender):C(type) + C(gender):C(location) + C(type):C(location) +
               C(gender):C(type):C(location)""", data=target).fit()
res5 = anova_lm(three_model, typ=2)
# None


In [None]:
# 6. Where do females prefer to smell themselves (front)
target = data[
    (data['location'] == 'front') &
    (data['gender'] == 'female') &
    (data['type'] == 'self')
    ].copy()

tukey = pairwise_tukeyhsd(endog=target['points'], groups=target['segment'].astype(str), alpha=0.05)
print(tukey.summary())

In [35]:
# 7. Where do females prefer to smell others (front)
target = data[
    (data['location'] == 'front') &
    (data['gender'] == 'female') &
    (data['type'] == 'other')
    ].copy()

tukey = pairwise_tukeyhsd(endog=target['points'], groups=target['segment'].astype(str), alpha=0.05)
print(tukey.summary())

Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
armpit  chest  -0.2475 0.0372 -0.4875 -0.0075   True
armpit   feet   0.1759  0.033  0.0074  0.3443   True
armpit   hair  -0.5949    0.0 -0.8036 -0.3862   True
armpit   hand  -0.1567 0.3168 -0.3643  0.0509  False
armpit   knee  -0.3663 0.1799 -0.8007  0.0681  False
armpit  mouth  -0.6053    0.0 -0.8038 -0.4067   True
armpit   neck  -0.4176    0.0 -0.6002 -0.2349   True
armpit pelvis  -0.6881    0.0  -0.863 -0.5132   True
 chest   feet   0.4234    0.0  0.1706  0.6761   True
 chest   hair  -0.3474 0.0041 -0.6285 -0.0662   True
 chest   hand   0.0908 0.9855 -0.1895  0.3711  False
 chest   knee  -0.1188 0.9974 -0.5923  0.3547  False
 chest  mouth  -0.3578 0.0017 -0.6315 -0.0841   True
 chest   neck  -0.1701 0.5354 -0.4324  0.0923  False
 chest pelvis  -0.4406    0.0 -0.6976 -0.1835   True
  feet   hair  -0.7707    0.0  -0.994 -0.5475 

In [None]:
# 8. Where do males prefer to smell themselves (front)
target = data[
    (data['location'] == 'front') &
    (data['gender'] == 'male') &
    (data['type'] == 'self')
    ].copy()

tukey = pairwise_tukeyhsd(endog=target['points'], groups=target['segment'].astype(str), alpha=0.05)
print(tukey.summary())

In [36]:
# 8. Where do females prefer to smell others (front)
target = data[
    (data['location'] == 'front') &
    (data['gender'] == 'male') &
    (data['type'] == 'other')
    ].copy()

tukey = pairwise_tukeyhsd(endog=target['points'], groups=target['segment'].astype(str), alpha=0.05)
print(tukey.summary())

Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
armpit  chest   -0.375 0.0008 -0.6492 -0.1009   True
armpit   feet   0.0658 0.9869 -0.1406  0.2722  False
armpit   hair  -0.7152    0.0 -0.9667 -0.4638   True
armpit   hand  -0.2624 0.0463 -0.5227 -0.0021   True
armpit   knee  -0.2628 0.6915 -0.7194  0.1939  False
armpit  mouth  -0.5545    0.0 -0.8031 -0.3059   True
armpit   neck  -0.4407    0.0 -0.6518 -0.2295   True
armpit pelvis  -0.6601    0.0 -0.8689 -0.4513   True
 chest   feet   0.4408 0.0001  0.1466  0.7351   True
 chest   hair  -0.3402 0.0346 -0.6677 -0.0127   True
 chest   hand   0.1126 0.9813 -0.2217  0.4469  False
 chest   knee   0.1123 0.9989 -0.3902  0.6148  False
 chest  mouth  -0.1795 0.7389 -0.5047  0.1458  False
 chest   neck  -0.0656  0.999 -0.3633   0.232  False
 chest pelvis  -0.2851 0.0695  -0.581  0.0109  False
  feet   hair   -0.781    0.0 -1.0543 -0.5078 