In [30]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.regression.mixed_linear_model import MixedLMResults

In [4]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from numpy.exceptions import AxisError
import pandas as pd
from scipy import stats

from sqlalchemy import create_engine, Table, Column, Integer, String, MetaData, Integer, String, insert, select

engine = create_engine("sqlite+pysqlite:///GPSdata1.db", echo=False)

metadata_obj = MetaData()

players_table = Table("players", metadata_obj, autoload_with=engine)
sessions_data_table = Table("sessions_data", metadata_obj, autoload_with=engine)

In [5]:
teams_dict = {}
with engine.connect() as conn:

    team_stm = (
        select(players_table.c.team)
        .group_by(players_table.c.team)
    )

    team_stm_result = conn.execute(team_stm)
    teams = team_stm_result.scalars().all()

for i, team in enumerate(teams):
    teams_dict[i] = team

teams_dict

{0: '1st', 1: 'U17', 2: 'U18', 3: 'U23'}

In [6]:
positions_dict = {}
with engine.connect() as conn:

    positions_stm = (
        select(players_table.c.position)
        .group_by(players_table.c.position)
    )

    positions_stm_result = conn.execute(positions_stm)
    positions = positions_stm_result.scalars().all()

for i, position in enumerate(positions):
    positions_dict[i] = position

positions_dict

{0: 'CB', 1: 'CM', 2: 'FB', 3: 'FWD'}

In [12]:
sessions_data_table.columns.items()


[('player_id',
  Column('player_id', INTEGER(), table=<sessions_data>, nullable=False)),
 ('match_id',
  Column('match_id', INTEGER(), table=<sessions_data>, nullable=False)),
 ('Duration', Column('Duration', FLOAT(), table=<sessions_data>)),
 ('Distance', Column('Distance', FLOAT(), table=<sessions_data>)),
 ('HSR', Column('HSR', FLOAT(), table=<sessions_data>)),
 ('SD', Column('SD', FLOAT(), table=<sessions_data>)),
 ('Accels + Decels',
  Column('Accels + Decels', INTEGER(), table=<sessions_data>)),
 ('PlayerLoad', Column('PlayerLoad', FLOAT(), table=<sessions_data>)),
 ('Top Speed', Column('Top Speed', FLOAT(), table=<sessions_data>)),
 ('Dis/min', Column('Dis/min', FLOAT(), table=<sessions_data>)),
 ('HSR/min', Column('HSR/min', FLOAT(), table=<sessions_data>)),
 ('SD/min', Column('SD/min', FLOAT(), table=<sessions_data>)),
 ('Accels + Decels /min',
  Column('Accels + Decels /min', FLOAT(), table=<sessions_data>))]

In [33]:
measure = "Dis/min"
measures = sessions_data_table.columns
measure_column = measures[measure]


with engine.connect() as conn:
    stmt = (
        select(players_table.c.id, players_table.c.team, players_table.c.position, measure_column)
        .join(sessions_data_table, players_table.c.id == sessions_data_table.c.player_id)
        .order_by(players_table.c.team)
    )

    data = conn.execute(stmt).mappings().all()

df = pd.DataFrame(data)

df = df[df["team"] == "1st"]

df


Unnamed: 0,Dis/min,id,position,team
0,99.4609,1,CB,1st
1,98.4719,1,CB,1st
2,97.0324,1,CB,1st
3,98.8781,1,CB,1st
4,98.8032,1,CB,1st
...,...,...,...,...
151,115.6280,16,FWD,1st
152,114.8077,16,FWD,1st
153,109.6183,16,FWD,1st
154,113.7134,16,FWD,1st


In [46]:
md = smf.mixedlm(f"Q('{measure}') ~ position", df, groups="id")
mdf = md.fit()

mdf.summary()

0,1,2,3
Model:,MixedLM,Dependent Variable:,Q('Dis/min')
No. Observations:,156,Method:,REML
No. Groups:,17,Scale:,14.6490
Min. group size:,1,Log-Likelihood:,-443.3988
Max. group size:,21,Converged:,Yes
Mean group size:,9.2,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,96.608,3.499,27.612,0.000,89.751,103.466
position[T.CM],21.628,4.269,5.066,0.000,13.261,29.995
position[T.FB],13.526,5.502,2.459,0.014,2.743,24.309
position[T.FWD],8.699,4.522,1.924,0.054,-0.164,17.562
id Var,35.316,4.084,,,,
