In [1]:
import pandas as pd
import statsmodels.api as sm
import numpy as np

### I've thought of mainly two ways to build models for this: Logistic and Classfication
### 1. Logistic:
#### Split oac ability into: can do oac and can't do oac > simple logistic
#### Split oac ability into 4: multiple oac, single oac, almost oac, no oac > ordinal logistic regression
#### Ordinal logistic regression seems to be supported on R from what I've found
#### I could assign a 'score' for each 4 levels
#### multiple oac - score of 4
#### single oac - score of 3
#### almost oac - score of 2
#### no oac - score of 1
#### and then I could do ordinal regression instead instead of ordinal logistic regression
#### OR I could just use R and do ordinal logistic regression
#### Time to go watch statquest again

### 2. Classification
#### SVM and Decision tree. Haven't given too much thought about these yet but these algorithms came to mind first


In [22]:
raw = pd.read_excel("no_dummies_v2.xlsx")

In [23]:
raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1127 entries, 0 to 1126
Data columns (total 14 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   height                    1127 non-null   float64
 1   weight                    1127 non-null   float64
 2   upper_arm_length          459 non-null    float64
 3   pull_style                1127 non-null   object 
 4   weight_x_repmax           1127 non-null   object 
 5   orm                       987 non-null    float64
 6   orm_as_%                  987 non-null    float64
 7   orm_bins                  987 non-null    object 
 8   max_pulls                 998 non-null    float64
 9   oac_ability               1127 non-null   object 
 10  best_contribution_to_oac  946 non-null    object 
 11  specific_training_time    1117 non-null   object 
 12  sessions_per_week         860 non-null    float64
 13  years_of_training         870 non-null    float64
dtypes: float

In [24]:
raw["oac_ability"].unique()

array(['one-negative', 'neither', 'multiple-negatives/bad-form',
       'never-tried', 'oap', 'both', 'oac', 'multiple-oap',
       'multiple-both', 'multiple-oac'], dtype=object)

In [25]:
def yesno(x):
    ability = x["oac_ability"]
    yes = ['oap','oac','both','multiple-both','multiple-oap','multiple-oac']
    if ability in yes:
        return 1
    else:
        return 0

In [26]:
raw["oac_yesno"] = raw.apply(yesno,axis=1)

### I did classify multiple oac/oap as a separate category but upon further thought I think since there are only 7 responses I think it's safe to combine it with single oap/oac
### Also there is a difference between being able to do multiple-negatives/bad form and single negative(personal experience) so will split these instead

In [27]:
def fourlevels(x):
    ability = x["oac_ability"]
    yes = ['oap','oac','both','multiple-both','multiple-oap','multiple-oac']
    if ability in yes:
        return "advanced"
    elif ability  == 'multiple-negatives/bad-form':
        return "intermediate"
    elif ability == 'one-negative':
        return "beginner"
    else:
        return "zero"

In [28]:
raw["oac_ability_rank"] = raw.apply(fourlevels,axis=1)

In [29]:
raw.head()

Unnamed: 0,height,weight,upper_arm_length,pull_style,weight_x_repmax,orm,orm_as_%,orm_bins,max_pulls,oac_ability,best_contribution_to_oac,specific_training_time,sessions_per_week,years_of_training,oac_yesno,oac_ability_rank
0,180.0,84.0,38.0,wpu,51x1,51.0,60.714286,60~79%,,one-negative,"pulley,high-volume,partials",2/2,2.0,2.0,0,beginner
1,192.0,78.0,,none,0x0,,,,12.0,neither,,0/0,,,0,zero
2,168.0,55.0,,wpu,75x1,75.0,136.363636,120~139%,18.0,multiple-negatives/bad-form,"climbers-approach,eccentrics,band",2/1,2.0,1.0,0,intermediate
3,190.0,80.0,14.0,wpu,25x1,25.0,31.25,20~39%,10.0,neither,weighted-pulls,2/1,2.0,1.0,0,zero
4,172.0,68.0,,wcu,40x2,47.2,69.411765,60~79%,18.0,neither,,1/0,1.0,,0,zero


### didn't like how I did dummies in the data cleaning stage so will do it again
### also forgot to drop some dummy columns

### To dummy:
#### weighted_pull_style
#### best_contribution
#### oac_ability_rank

### Also going to drop some responses where weighted pull orm % is over 170% ish

### Columns to drop
#### Arm length > too few responses
#### weight_x_repmax > not needed
#### orm_bins > not needed
#### orm > already have orm as %
#### oac_ability > not needed, already dummied
#### specific_training_time > not needed

In [30]:
raw = raw.drop(columns=["upper_arm_length","weight_x_repmax","orm","orm_bins","oac_ability","specific_training_time"])

In [31]:
raw.head()

Unnamed: 0,height,weight,pull_style,orm_as_%,max_pulls,best_contribution_to_oac,sessions_per_week,years_of_training,oac_yesno,oac_ability_rank
0,180.0,84.0,wpu,60.714286,,"pulley,high-volume,partials",2.0,2.0,0,beginner
1,192.0,78.0,none,,12.0,,,,0,zero
2,168.0,55.0,wpu,136.363636,18.0,"climbers-approach,eccentrics,band",2.0,1.0,0,intermediate
3,190.0,80.0,wpu,31.25,10.0,weighted-pulls,2.0,1.0,0,zero
4,172.0,68.0,wcu,69.411765,18.0,,1.0,,0,zero


#### gonna classify neutral grip and ring pull up as weighted chin up because neutral and ring have so few occurences

In [32]:
raw["pull_style"] = raw["pull_style"].apply(lambda x: x.replace("npu","wcu"))
raw["pull_style"] = raw["pull_style"].apply(lambda x: x.replace("rpu","wcu"))

In [33]:
pull_style_dummies = pd.get_dummies(raw.pull_style)
pull_style_dummies.head()

Unnamed: 0,both,none,wcu,wpu
0,0,0,0,1
1,0,1,0,0
2,0,0,0,1
3,0,0,0,1
4,0,0,1,0


In [34]:
oac_ability_rank_dummies = pd.get_dummies(raw.oac_ability_rank)
oac_ability_rank_dummies.head()

Unnamed: 0,advanced,beginner,intermediate,zero
0,0,1,0,0
1,0,0,0,1
2,0,0,1,0
3,0,0,0,1
4,0,0,0,1


In [35]:
from sklearn.preprocessing import MultiLabelBinarizer

In [36]:
def splitter(x):
    try:
        return x.split(",")
    except:
        return []

In [37]:
raw["best_contribution_to_oac"] = raw["best_contribution_to_oac"].apply(lambda x: splitter(x))
raw.head()

Unnamed: 0,height,weight,pull_style,orm_as_%,max_pulls,best_contribution_to_oac,sessions_per_week,years_of_training,oac_yesno,oac_ability_rank
0,180.0,84.0,wpu,60.714286,,"[pulley, high-volume, partials]",2.0,2.0,0,beginner
1,192.0,78.0,none,,12.0,[],,,0,zero
2,168.0,55.0,wpu,136.363636,18.0,"[climbers-approach, eccentrics, band]",2.0,1.0,0,intermediate
3,190.0,80.0,wpu,31.25,10.0,[weighted-pulls],2.0,1.0,0,zero
4,172.0,68.0,wcu,69.411765,18.0,[],1.0,,0,zero


In [38]:
mlb = MultiLabelBinarizer()

contribution = raw["best_contribution_to_oac"]

dummied_contributions = pd.DataFrame(mlb.fit_transform(contribution),columns=mlb.classes_, index=contribution.index)
dummied_contributions.head()

Unnamed: 0,band,climbers-approach,climbing,eccentrics,grip,high-volume,isometrics,mixed-grip/mantle,partials,pulley,rope-climbs,weighted-pulls
0,0,0,0,0,0,1,0,0,1,1,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0
2,1,1,0,1,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,0,0,0,0,0,0,0


In [39]:
oac = raw[raw.columns[-2:]]
oac.head()

Unnamed: 0,oac_yesno,oac_ability_rank
0,0,beginner
1,0,zero
2,0,intermediate
3,0,zero
4,0,zero


In [40]:
oac = pd.concat([oac,oac_ability_rank_dummies],axis=1)
oac.head()

Unnamed: 0,oac_yesno,oac_ability_rank,advanced,beginner,intermediate,zero
0,0,beginner,0,1,0,0
1,0,zero,0,0,0,1
2,0,intermediate,0,0,1,0
3,0,zero,0,0,0,1
4,0,zero,0,0,0,1


In [41]:
variables = raw[raw.columns[:-2]]
variables.head()

Unnamed: 0,height,weight,pull_style,orm_as_%,max_pulls,best_contribution_to_oac,sessions_per_week,years_of_training
0,180.0,84.0,wpu,60.714286,,"[pulley, high-volume, partials]",2.0,2.0
1,192.0,78.0,none,,12.0,[],,
2,168.0,55.0,wpu,136.363636,18.0,"[climbers-approach, eccentrics, band]",2.0,1.0
3,190.0,80.0,wpu,31.25,10.0,[weighted-pulls],2.0,1.0
4,172.0,68.0,wcu,69.411765,18.0,[],1.0,


In [42]:
variables = pd.concat([variables,pull_style_dummies,dummied_contributions],axis=1)
variables.head()

Unnamed: 0,height,weight,pull_style,orm_as_%,max_pulls,best_contribution_to_oac,sessions_per_week,years_of_training,both,none,...,climbing,eccentrics,grip,high-volume,isometrics,mixed-grip/mantle,partials,pulley,rope-climbs,weighted-pulls
0,180.0,84.0,wpu,60.714286,,"[pulley, high-volume, partials]",2.0,2.0,0,0,...,0,0,0,1,0,0,1,1,0,0
1,192.0,78.0,none,,12.0,[],,,0,1,...,0,0,0,0,0,0,0,0,0,0
2,168.0,55.0,wpu,136.363636,18.0,"[climbers-approach, eccentrics, band]",2.0,1.0,0,0,...,0,1,0,0,0,0,0,0,0,0
3,190.0,80.0,wpu,31.25,10.0,[weighted-pulls],2.0,1.0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,172.0,68.0,wcu,69.411765,18.0,[],1.0,,0,0,...,0,0,0,0,0,0,0,0,0,0


In [43]:
variables.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1127 entries, 0 to 1126
Data columns (total 24 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   height                    1127 non-null   float64
 1   weight                    1127 non-null   float64
 2   pull_style                1127 non-null   object 
 3   orm_as_%                  987 non-null    float64
 4   max_pulls                 998 non-null    float64
 5   best_contribution_to_oac  1127 non-null   object 
 6   sessions_per_week         860 non-null    float64
 7   years_of_training         870 non-null    float64
 8   both                      1127 non-null   uint8  
 9   none                      1127 non-null   uint8  
 10  wcu                       1127 non-null   uint8  
 11  wpu                       1127 non-null   uint8  
 12  band                      1127 non-null   int64  
 13  climbers-approach         1127 non-null   int64  
 14  climbing

In [44]:
#Dropping uselss columns and dropping 1 of each dummied variables
#Dropping 'both' for pull style
#Not going to drop 'weighted-pulls' because it implies that it is default where all else is zero
#But it is not
variables = variables.drop(columns=["pull_style","best_contribution_to_oac","both"])

In [45]:
final = pd.concat([variables,oac],axis=1)
final.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1127 entries, 0 to 1126
Data columns (total 27 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   height             1127 non-null   float64
 1   weight             1127 non-null   float64
 2   orm_as_%           987 non-null    float64
 3   max_pulls          998 non-null    float64
 4   sessions_per_week  860 non-null    float64
 5   years_of_training  870 non-null    float64
 6   none               1127 non-null   uint8  
 7   wcu                1127 non-null   uint8  
 8   wpu                1127 non-null   uint8  
 9   band               1127 non-null   int64  
 10  climbers-approach  1127 non-null   int64  
 11  climbing           1127 non-null   int64  
 12  eccentrics         1127 non-null   int64  
 13  grip               1127 non-null   int64  
 14  high-volume        1127 non-null   int64  
 15  isometrics         1127 non-null   int64  
 16  mixed-grip/mantle  1127 

In [157]:
super_strong = final[final["orm_as_%"]>140]
super_strong = super_strong.reset_index(drop=True)
super_strong.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17 entries, 0 to 16
Data columns (total 27 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   height             17 non-null     float64
 1   weight             17 non-null     float64
 2   orm_as_%           17 non-null     float64
 3   max_pulls          14 non-null     float64
 4   sessions_per_week  14 non-null     float64
 5   years_of_training  14 non-null     float64
 6   none               17 non-null     uint8  
 7   wcu                17 non-null     uint8  
 8   wpu                17 non-null     uint8  
 9   band               17 non-null     int64  
 10  climbers-approach  17 non-null     int64  
 11  climbing           17 non-null     int64  
 12  eccentrics         17 non-null     int64  
 13  grip               17 non-null     int64  
 14  high-volume        17 non-null     int64  
 15  isometrics         17 non-null     int64  
 16  mixed-grip/mantle  17 non-nu

In [158]:
final_orm_sanity = final[final["orm_as_%"]<=140]
final_orm_sanity = final_orm_sanity.reset_index(drop=True)
final_orm_sanity.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 970 entries, 0 to 969
Data columns (total 27 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   height             970 non-null    float64
 1   weight             970 non-null    float64
 2   orm_as_%           970 non-null    float64
 3   max_pulls          868 non-null    float64
 4   sessions_per_week  757 non-null    float64
 5   years_of_training  761 non-null    float64
 6   none               970 non-null    uint8  
 7   wcu                970 non-null    uint8  
 8   wpu                970 non-null    uint8  
 9   band               970 non-null    int64  
 10  climbers-approach  970 non-null    int64  
 11  climbing           970 non-null    int64  
 12  eccentrics         970 non-null    int64  
 13  grip               970 non-null    int64  
 14  high-volume        970 non-null    int64  
 15  isometrics         970 non-null    int64  
 16  mixed-grip/mantle  970 non

# Finally doing logistic regression
# Can't do it with orm_as_% as null values so dropping those

In [171]:
logit = final[final["orm_as_%"]<=200]
logit.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 987 entries, 0 to 1126
Data columns (total 27 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   height             987 non-null    float64
 1   weight             987 non-null    float64
 2   orm_as_%           987 non-null    float64
 3   max_pulls          882 non-null    float64
 4   sessions_per_week  771 non-null    float64
 5   years_of_training  775 non-null    float64
 6   none               987 non-null    uint8  
 7   wcu                987 non-null    uint8  
 8   wpu                987 non-null    uint8  
 9   band               987 non-null    int64  
 10  climbers-approach  987 non-null    int64  
 11  climbing           987 non-null    int64  
 12  eccentrics         987 non-null    int64  
 13  grip               987 non-null    int64  
 14  high-volume        987 non-null    int64  
 15  isometrics         987 non-null    int64  
 16  mixed-grip/mantle  987 no

In [179]:
X = logit[logit.columns[:-6]]
X = X.drop(columns=["max_pulls","sessions_per_week","years_of_training","none","wcu","wpu"])
X.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 987 entries, 0 to 1126
Data columns (total 15 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   height             987 non-null    float64
 1   weight             987 non-null    float64
 2   orm_as_%           987 non-null    float64
 3   band               987 non-null    int64  
 4   climbers-approach  987 non-null    int64  
 5   climbing           987 non-null    int64  
 6   eccentrics         987 non-null    int64  
 7   grip               987 non-null    int64  
 8   high-volume        987 non-null    int64  
 9   isometrics         987 non-null    int64  
 10  mixed-grip/mantle  987 non-null    int64  
 11  partials           987 non-null    int64  
 12  pulley             987 non-null    int64  
 13  rope-climbs        987 non-null    int64  
 14  weighted-pulls     987 non-null    int64  
dtypes: float64(3), int64(12)
memory usage: 123.4 KB


In [180]:
y = logit["oac_yesno"]

In [181]:
log_reg = sm.Logit(y, X).fit()

Optimization terminated successfully.
         Current function value: 0.497710
         Iterations 6


In [182]:
print(log_reg.summary())

                           Logit Regression Results                           
Dep. Variable:              oac_yesno   No. Observations:                  987
Model:                          Logit   Df Residuals:                      972
Method:                           MLE   Df Model:                           14
Date:                Sat, 01 May 2021   Pseudo R-squ.:                  0.1982
Time:                        10:32:08   Log-Likelihood:                -491.24
converged:                       True   LL-Null:                       -612.67
Covariance Type:            nonrobust   LLR p-value:                 8.615e-44
                        coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------------
height               -0.0118      0.005     -2.588      0.010      -0.021      -0.003
weight               -0.0284      0.011     -2.684      0.007      -0.049      -0.008
orm_as_%              0.

#### Doing it again with training time and max pulls

In [184]:
logit_training_time = final[final["orm_as_%"]<=200]
logit_training_time = logit_training_time[(logit_training_time["sessions_per_week"]>=0) & (logit_training_time["years_of_training"]>=0)]
logit_training_time['max_pulls'] = logit_training_time['max_pulls'].fillna((logit_training_time['max_pulls'].mean()))
logit_training_time.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 745 entries, 0 to 1126
Data columns (total 27 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   height             745 non-null    float64
 1   weight             745 non-null    float64
 2   orm_as_%           745 non-null    float64
 3   max_pulls          745 non-null    float64
 4   sessions_per_week  745 non-null    float64
 5   years_of_training  745 non-null    float64
 6   none               745 non-null    uint8  
 7   wcu                745 non-null    uint8  
 8   wpu                745 non-null    uint8  
 9   band               745 non-null    int64  
 10  climbers-approach  745 non-null    int64  
 11  climbing           745 non-null    int64  
 12  eccentrics         745 non-null    int64  
 13  grip               745 non-null    int64  
 14  high-volume        745 non-null    int64  
 15  isometrics         745 non-null    int64  
 16  mixed-grip/mantle  745 no

In [185]:
X = logit_training_time[logit_training_time.columns[:-6]]
X = X.drop(columns=["none","wcu","wpu"])
y = logit_training_time["oac_yesno"]
log_reg = sm.Logit(y, X).fit()

         Current function value: 0.447085
         Iterations: 35




In [186]:
print(log_reg.summary())

                           Logit Regression Results                           
Dep. Variable:              oac_yesno   No. Observations:                  745
Model:                          Logit   Df Residuals:                      727
Method:                           MLE   Df Model:                           17
Date:                Sat, 01 May 2021   Pseudo R-squ.:                  0.3079
Time:                        10:33:20   Log-Likelihood:                -333.08
converged:                      False   LL-Null:                       -481.26
Covariance Type:            nonrobust   LLR p-value:                 6.361e-53
                        coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------------
height               -0.0236      0.006     -3.952      0.000      -0.035      -0.012
weight               -0.0391      0.013     -3.047      0.002      -0.064      -0.014
orm_as_%              0.

### Doing it with sanity checked one rep max
#### Not expecting much to change

In [187]:
logit_sane = final[final["orm_as_%"]<=140]
logit_sane.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 970 entries, 0 to 1126
Data columns (total 27 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   height             970 non-null    float64
 1   weight             970 non-null    float64
 2   orm_as_%           970 non-null    float64
 3   max_pulls          868 non-null    float64
 4   sessions_per_week  757 non-null    float64
 5   years_of_training  761 non-null    float64
 6   none               970 non-null    uint8  
 7   wcu                970 non-null    uint8  
 8   wpu                970 non-null    uint8  
 9   band               970 non-null    int64  
 10  climbers-approach  970 non-null    int64  
 11  climbing           970 non-null    int64  
 12  eccentrics         970 non-null    int64  
 13  grip               970 non-null    int64  
 14  high-volume        970 non-null    int64  
 15  isometrics         970 non-null    int64  
 16  mixed-grip/mantle  970 no

In [188]:
X = logit_sane[logit_sane.columns[:-6]]
X = X.drop(columns=["max_pulls","sessions_per_week","years_of_training","none","wcu","wpu"])
y = logit_sane["oac_yesno"]


In [189]:
log_reg = sm.Logit(y, X).fit()

Optimization terminated successfully.
         Current function value: 0.444651
         Iterations 6


In [190]:
print(log_reg.summary())

                           Logit Regression Results                           
Dep. Variable:              oac_yesno   No. Observations:                  970
Model:                          Logit   Df Residuals:                      955
Method:                           MLE   Df Model:                           14
Date:                Sat, 01 May 2021   Pseudo R-squ.:                  0.2876
Time:                        10:34:20   Log-Likelihood:                -431.31
converged:                       True   LL-Null:                       -605.47
Covariance Type:            nonrobust   LLR p-value:                 9.250e-66
                        coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------------
height               -0.0237      0.005     -4.677      0.000      -0.034      -0.014
weight               -0.0244      0.011     -2.155      0.031      -0.047      -0.002
orm_as_%              0.

In [192]:
logit_training_time = logit_sane[(logit_sane["sessions_per_week"]>=0) & (logit_sane["years_of_training"]>=0)]
logit_training_time['max_pulls'] = logit_training_time['max_pulls'].fillna((logit_training_time['max_pulls'].mean()))
logit_training_time.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 731 entries, 0 to 1126
Data columns (total 27 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   height             731 non-null    float64
 1   weight             731 non-null    float64
 2   orm_as_%           731 non-null    float64
 3   max_pulls          731 non-null    float64
 4   sessions_per_week  731 non-null    float64
 5   years_of_training  731 non-null    float64
 6   none               731 non-null    uint8  
 7   wcu                731 non-null    uint8  
 8   wpu                731 non-null    uint8  
 9   band               731 non-null    int64  
 10  climbers-approach  731 non-null    int64  
 11  climbing           731 non-null    int64  
 12  eccentrics         731 non-null    int64  
 13  grip               731 non-null    int64  
 14  high-volume        731 non-null    int64  
 15  isometrics         731 non-null    int64  
 16  mixed-grip/mantle  731 no

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  logit_training_time['max_pulls'] = logit_training_time['max_pulls'].fillna((logit_training_time['max_pulls'].mean()))


In [193]:
X = logit_training_time[logit_training_time.columns[:-6]]
X = X.drop(columns=["none","wcu","wpu"])
y = logit_training_time["oac_yesno"]
log_reg = sm.Logit(y, X).fit()

         Current function value: 0.423948
         Iterations: 35




In [194]:
print(log_reg.summary())

                           Logit Regression Results                           
Dep. Variable:              oac_yesno   No. Observations:                  731
Model:                          Logit   Df Residuals:                      713
Method:                           MLE   Df Model:                           17
Date:                Sat, 01 May 2021   Pseudo R-squ.:                  0.3470
Time:                        10:36:37   Log-Likelihood:                -309.91
converged:                      False   LL-Null:                       -474.60
Covariance Type:            nonrobust   LLR p-value:                 9.375e-60
                        coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------------
height               -0.0312      0.006     -4.851      0.000      -0.044      -0.019
weight               -0.0333      0.013     -2.500      0.012      -0.059      -0.007
orm_as_%              0.

### Making datasets for ordinal regression in R

#### Insane as in orm is not sanitised

In [207]:
insane_ordinal = final[final["orm_as_%"]<=200]
X = insane_ordinal[insane_ordinal.columns[:-6]]
X = X.drop(columns=["max_pulls","sessions_per_week","years_of_training","none","wcu","wpu"])
y = insane_ordinal["oac_ability_rank"]
insane_ordinal_final = pd.concat([X,y],axis=1)

In [209]:
insane_ordinal_final.to_excel("insane_ordinal_final.xlsx",index=False)

In [212]:
#tt as in training time
insane_ordinal_tt = final[final["orm_as_%"]<=200]
insane_ordinal_tt = insane_ordinal_tt[(insane_ordinal_tt["sessions_per_week"]>=0) & (insane_ordinal_tt["years_of_training"]>=0)]
insane_ordinal_tt['max_pulls'] = insane_ordinal_tt['max_pulls'].fillna((insane_ordinal_tt['max_pulls'].mean()))
X = insane_ordinal_tt[insane_ordinal_tt.columns[:-6]]
X = X.drop(columns=["none","wcu","wpu"])
y = insane_ordinal_tt["oac_ability_rank"]
insane_ordinal_tt_final = pd.concat([X,y],axis=1)

In [214]:
insane_ordinal_tt_final.to_excel("insane_ordinal_final_tt.xlsx",index=False)

#### Now doing a sane version of this

In [215]:
sane_ordinal = final[final["orm_as_%"]<=140]
X = sane_ordinal[sane_ordinal.columns[:-6]]
X = X.drop(columns=["max_pulls","sessions_per_week","years_of_training","none","wcu","wpu"])
y = sane_ordinal["oac_ability_rank"]
sane_ordinal_final = pd.concat([X,y],axis=1)
sane_ordinal_final.to_excel("sane_ordinal_final.xlsx",index=False)

In [216]:
sane_ordinal_tt = final[final["orm_as_%"]<=140]
sane_ordinal_tt = sane_ordinal_tt[(sane_ordinal_tt["sessions_per_week"]>=0) & (sane_ordinal_tt["years_of_training"]>=0)]
sane_ordinal_tt['max_pulls'] = sane_ordinal_tt['max_pulls'].fillna((sane_ordinal_tt['max_pulls'].mean()))
X = sane_ordinal_tt[sane_ordinal_tt.columns[:-6]]
X = X.drop(columns=["none","wcu","wpu"])
y = sane_ordinal_tt["oac_ability_rank"]
sane_ordinal_tt_final = pd.concat([X,y],axis=1)
sane_ordinal_tt_final.to_excel("sane_ordinal_final_tt.xlsx",index=False)

### Gonna do some simple cross tabulation

In [46]:
final.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1127 entries, 0 to 1126
Data columns (total 27 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   height             1127 non-null   float64
 1   weight             1127 non-null   float64
 2   orm_as_%           987 non-null    float64
 3   max_pulls          998 non-null    float64
 4   sessions_per_week  860 non-null    float64
 5   years_of_training  870 non-null    float64
 6   none               1127 non-null   uint8  
 7   wcu                1127 non-null   uint8  
 8   wpu                1127 non-null   uint8  
 9   band               1127 non-null   int64  
 10  climbers-approach  1127 non-null   int64  
 11  climbing           1127 non-null   int64  
 12  eccentrics         1127 non-null   int64  
 13  grip               1127 non-null   int64  
 14  high-volume        1127 non-null   int64  
 15  isometrics         1127 non-null   int64  
 16  mixed-grip/mantle  1127 

In [48]:
ranked_only = final.drop(columns=["oac_yesno","advanced","beginner","intermediate","zero"])
ranked_only.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1127 entries, 0 to 1126
Data columns (total 22 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   height             1127 non-null   float64
 1   weight             1127 non-null   float64
 2   orm_as_%           987 non-null    float64
 3   max_pulls          998 non-null    float64
 4   sessions_per_week  860 non-null    float64
 5   years_of_training  870 non-null    float64
 6   none               1127 non-null   uint8  
 7   wcu                1127 non-null   uint8  
 8   wpu                1127 non-null   uint8  
 9   band               1127 non-null   int64  
 10  climbers-approach  1127 non-null   int64  
 11  climbing           1127 non-null   int64  
 12  eccentrics         1127 non-null   int64  
 13  grip               1127 non-null   int64  
 14  high-volume        1127 non-null   int64  
 15  isometrics         1127 non-null   int64  
 16  mixed-grip/mantle  1127 

In [None]:
ranked_pivot = ranked_only.pivot_table(index="oac_ability_rank",columns="orm_bins",aggfunc="size",fill_value=0)
