# Weighted Pull up & Front Lever Survey

## Data cleaning

In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
import re
from statistics import mean

In [16]:
raw = pd.read_csv("Front Lever and Pullups correlation (Responses) - Form Responses 1(1).csv",encoding = "ISO-8859-1")

In [17]:
raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 611 entries, 0 to 610
Data columns (total 7 columns):
 #   Column                                             Non-Null Count  Dtype 
---  ------                                             --------------  ----- 
 0   Timestamp                                          609 non-null    object
 1   Weighted Pullup 1RM (% of BW)                      609 non-null    object
 2   Max Front Lever progression (3 seconds good form)  609 non-null    object
 3   Other thoughts or comments? Helpful data           219 non-null    object
 4   Bodyweight (kg)                                    595 non-null    object
 5   Max pullups (endurance)                            587 non-null    object
 6   Height (cm)                                        498 non-null    object
dtypes: object(7)
memory usage: 33.5+ KB


In [58]:
copy = raw.copy()

In [59]:
copy.dropna(axis=0,how='all').reset_index()

Unnamed: 0,index,Timestamp,Weighted Pullup 1RM (% of BW),Max Front Lever progression (3 seconds good form),Other thoughts or comments? Helpful data,Bodyweight (kg),Max pullups (endurance),Height (cm)
0,0,10/7/2020 0:41,<10% (so usually couple of pullups with BW),Straddle lever,,59.8,20,1.65
1,1,10/6/2020 20:38,65%-80%,Full lever,,72,21,1.78
2,2,10/7/2020 1:41,50%-65%,Straddle Halflay lever,I have 50% weighted pull up and barely 3 secon...,68,17,69
3,3,10/6/2020 19:50,50%-65%,Straddle lever,,62,20,142
4,4,10/6/2020 20:10,30%-50%,Full lever,,45,20,160
...,...,...,...,...,...,...,...,...
604,606,12/20/2020 21:37,>90%,Full lever,It may be related to both weighted pull ups an...,55,27,170
605,607,12/20/2020 21:38,65%-80%,Full lever,,72,22,175
606,608,12/20/2020 22:44,10%-30%,Tuck lever,,88,10,187
607,609,12/20/2020 22:55,65%-80%,Straddle lever,none,65,35,170


In [60]:
weight = []
for i in copy.index:
    bw = copy["Bodyweight (kg)"][i]
    bw_extracted = re.findall('(\d+(.\d+)?)',str(bw))
    try:
        if re.match('(\d{2}-\d{2})',bw_extracted[0][0]) is not None:
            weight_range = [int(x) for x in bw_extracted[0][0].split("-")]
            weight.append(mean(weight_range))
        else:
            weight.append(float(bw_extracted[0][0]))
    except:
        weight.append('')

In [61]:
copy["Bodyweight (kg)"] = weight

In [62]:
height = []
for i in copy.index:
    he = copy["Height (cm)"][i]
    he_extracted = re.findall('(\d+(.\d+)?)',str(he))
    try:
        he_cleaned = he_extracted[0][0].replace(',','.').replace('cm','').replace('m','')
        if re.match('\d.\d{2}',he_cleaned) is not None:
            height.append(float(he_cleaned)*100)
        else:
            height.append(float(he_cleaned))
    except:
        height.append('')

In [63]:
copy["Height (cm)"] = height

In [65]:
copy["Height (cm)"][2] = 169.0

In [71]:
maxpulls = []
for i in copy.index:
    mpulls = copy["Max pullups (endurance)"][i]
    mp_extracted = [int(x) for x in re.findall('\d+',str(mpulls))]
    if len(mp_extracted) >= 1:
        maxpulls.append(mean(mp_extracted))
    else:
        maxpulls.append('')

In [73]:
copy["Max pullups (endurance)"] = maxpulls

In [76]:
copy.dropna(axis=0,how='all')

Unnamed: 0,Timestamp,Weighted Pullup 1RM (% of BW),Max Front Lever progression (3 seconds good form),Other thoughts or comments? Helpful data,Bodyweight (kg),Max pullups (endurance),Height (cm)
0,10/7/2020 0:41,<10% (so usually couple of pullups with BW),Straddle lever,,59.8,20,165
1,10/6/2020 20:38,65%-80%,Full lever,,72,21,178
2,10/7/2020 1:41,50%-65%,Straddle Halflay lever,I have 50% weighted pull up and barely 3 secon...,68,17,169
3,10/6/2020 19:50,50%-65%,Straddle lever,,62,20,142
4,10/6/2020 20:10,30%-50%,Full lever,,45,20,160
...,...,...,...,...,...,...,...
606,12/20/2020 21:37,>90%,Full lever,It may be related to both weighted pull ups an...,55,27,170
607,12/20/2020 21:38,65%-80%,Full lever,,72,22,175
608,12/20/2020 22:44,10%-30%,Tuck lever,,88,10,187
609,12/20/2020 22:55,65%-80%,Straddle lever,none,65,35,170


In [77]:
copy.dropna(axis=0,how='all').reset_index()

Unnamed: 0,index,Timestamp,Weighted Pullup 1RM (% of BW),Max Front Lever progression (3 seconds good form),Other thoughts or comments? Helpful data,Bodyweight (kg),Max pullups (endurance),Height (cm)
0,0,10/7/2020 0:41,<10% (so usually couple of pullups with BW),Straddle lever,,59.8,20,165
1,1,10/6/2020 20:38,65%-80%,Full lever,,72,21,178
2,2,10/7/2020 1:41,50%-65%,Straddle Halflay lever,I have 50% weighted pull up and barely 3 secon...,68,17,169
3,3,10/6/2020 19:50,50%-65%,Straddle lever,,62,20,142
4,4,10/6/2020 20:10,30%-50%,Full lever,,45,20,160
...,...,...,...,...,...,...,...,...
606,606,12/20/2020 21:37,>90%,Full lever,It may be related to both weighted pull ups an...,55,27,170
607,607,12/20/2020 21:38,65%-80%,Full lever,,72,22,175
608,608,12/20/2020 22:44,10%-30%,Tuck lever,,88,10,187
609,609,12/20/2020 22:55,65%-80%,Straddle lever,none,65,35,170


In [13]:
copy.to_csv("Weight PU and FL cleaned.csv",index=False)

NameError: name 'copy' is not defined

In [14]:
cleaned = pd.read_csv("Weighted PU and FL cleaned.csv",encoding = "ISO-8859-1")

In [15]:
cleaned.columns

Index(['Timestamp', 'Weighted Pullup 1RM (% of BW)',
       'Max Front Lever progression (3 seconds good form)',
       'Other thoughts or comments? Helpful data', 'Bodyweight (kg)',
       'Max pullups (endurance)', 'Height (cm)'],
      dtype='object')

In [16]:
cleaned['Weighted Pullup 1RM (% of BW)'] = cleaned['Weighted Pullup 1RM (% of BW)'].apply(lambda x : x.replace("<10% (so usually couple of pullups with BW)","<10%"))

In [17]:
cleaned.head()

Unnamed: 0,Timestamp,Weighted Pullup 1RM (% of BW),Max Front Lever progression (3 seconds good form),Other thoughts or comments? Helpful data,Bodyweight (kg),Max pullups (endurance),Height (cm)
0,10/7/2020 0:41,<10%,Straddle lever,,59.8,20.0,165.0
1,10/6/2020 20:38,65%-80%,Full lever,,72.0,21.0,178.0
2,10/7/2020 1:41,50%-65%,Straddle Halflay lever,I have 50% weighted pull up and barely 3 secon...,68.0,17.0,169.0
3,10/6/2020 19:50,50%-65%,Straddle lever,,62.0,20.0,142.0
4,10/6/2020 20:10,30%-50%,Full lever,,45.0,20.0,160.0


In [19]:
cleaned.to_csv("Weighted PU and FL cleaned.csv",index=False)