In [505]:
#Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import torch
%matplotlib inline

In [506]:
combined_df = pd.read_csv('Mens_Crossfit_data_cleaned.csv')

In [507]:
combined_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5561 entries, 0 to 5560
Data columns (total 17 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Userid          5561 non-null   int64  
 1   Name            5561 non-null   object 
 2   Age             5561 non-null   int64  
 3   Height_inches   5561 non-null   float64
 4   Weight_lbs      5561 non-null   float64
 5   Back Squat      5561 non-null   float64
 6   Clean_and_Jerk  5561 non-null   float64
 7   Snatch          5561 non-null   float64
 8   Deadlift        5561 non-null   float64
 9   Fight Gone Bad  5561 non-null   float64
 10  17.1_time       5561 non-null   float64
 11  17.1_reps       5561 non-null   int64  
 12  17.2_score      5561 non-null   float64
 13  17.3_time       5561 non-null   float64
 14  17.3_reps       5561 non-null   int64  
 15  17.4_score      5561 non-null   float64
 16  17.5_score      5561 non-null   int64  
dtypes: float64(11), int64(5), object(

In [508]:
combined_df.columns

Index(['Userid', 'Name', 'Age', 'Height_inches', 'Weight_lbs', 'Back Squat',
       'Clean_and_Jerk', 'Snatch', 'Deadlift', 'Fight Gone Bad', '17.1_time',
       '17.1_reps', '17.2_score', '17.3_time', '17.3_reps', '17.4_score',
       '17.5_score'],
      dtype='object')

In [509]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score,mean_absolute_error

#Let's create our training and testing data

#Get rid of the names and userid as these will not help when modeling!
combined_df.drop(['Userid','Name'], axis = 1, inplace = True)
combined_df

Unnamed: 0,Age,Height_inches,Weight_lbs,Back Squat,Clean_and_Jerk,Snatch,Deadlift,Fight Gone Bad,17.1_time,17.1_reps,17.2_score,17.3_time,17.3_reps,17.4_score,17.5_score
0,34,77.0,231.0,335.0,265.0,210.0,415.0,393.0,1200.0,218,92.0,1440.0,56,178.0,920
1,28,72.0,240.0,420.0,295.0,225.0,455.0,286.0,1200.0,221,78.0,1440.0,88,173.0,968
2,37,74.0,198.0,297.0,231.0,169.0,352.0,398.0,878.0,225,128.0,1440.0,67,186.0,794
3,42,65.0,145.0,330.0,250.0,198.0,375.0,314.0,1200.0,178,89.0,1440.0,80,176.0,947
4,36,70.0,200.0,400.0,285.0,215.0,485.0,438.0,842.0,225,182.0,1440.0,104,219.0,563
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5556,29,73.0,250.0,325.0,245.0,175.0,445.0,274.0,1200.0,195,78.0,1440.0,57,165.0,0
5557,31,68.0,163.0,282.0,187.0,132.0,359.0,282.0,860.0,225,123.0,1440.0,80,201.0,536
5558,36,74.0,212.0,405.0,310.0,250.0,465.0,419.0,804.0,225,167.0,1440.0,130,217.0,565
5559,28,67.0,155.0,315.0,220.0,165.0,405.0,305.0,995.0,225,136.0,1440.0,80,197.0,688


In [510]:
#Let's predict our 17.5 score 

X = combined_df.drop('17.5_score', axis = 1)
y = combined_df['17.5_score']


### FEATURE SCALING HERE!

In [511]:
#scale the X features using sklearn

from sklearn.preprocessing import StandardScaler

# Assuming 'combined_df' is your DataFrame and '17.5_score' is the target column
features = combined_df.drop('17.5_score', axis=1)
target = combined_df['17.5_score']

scaler = StandardScaler()
X = scaler.fit_transform(X)


In [512]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2, random_state = 42)

model = RandomForestRegressor(n_estimators = 100, random_state = 42)

model.fit(X_train,y_train)

In [513]:
y_preds = model.predict(X_test)

# view the predictions
pd.DataFrame({'actual': y_test/60, 'preds': y_preds/60}).head(10)

Unnamed: 0,actual,preds
889,18.3,19.5675
1075,10.95,11.301833
5035,15.45,19.601333
2995,21.8,15.7705
4319,13.533333,14.779667
5022,21.5,16.576833
2042,15.916667,18.112
4877,24.616667,21.324167
3433,8.616667,9.5065
2095,11.4,12.4635


In [514]:
r2_score(y_test,y_preds)

0.4257088374602114

In [515]:
#Calculate MAE
mae = mean_absolute_error(y_test,y_preds)
mae

142.52580413297395

### PyTorch Approach

In [516]:
!pip install torchmetrics



In [517]:
import torch
from torch import nn
import matplotlib.pyplot as plt
X_torch = torch.from_numpy(X).type(torch.float32)
#X_torch = torch.from_numpy(X.values).type(torch.float32)
y_torch = torch.from_numpy(y.values).type(torch.float32)

In [518]:
X_train, X_test, y_train, y_test = train_test_split(X_torch, y_torch, test_size=0.33, random_state=42)

In [519]:
X_train.dtype,X_test.dtype,y_train.dtype,y_test.dtype

(torch.float32, torch.float32, torch.float32, torch.float32)

In [520]:
#Create model and instantiate

class WOD_model(nn.Module):
    def __init__(self,input_features, output_features, hidden_units):
        super().__init__()
        
        #Create the layers
        self.model = nn.Sequential(
        nn.Linear(in_features = input_features, out_features = hidden_units),
        nn.GELU(),
        nn.Linear(in_features = hidden_units, out_features = hidden_units),
        nn.GELU(),
        nn.Linear(in_features = hidden_units, out_features = hidden_units),
        nn.GELU(),
        #nn.Dropout(p=0.2),
        nn.Linear(in_features = hidden_units, out_features = hidden_units),
        nn.GELU(),
        nn.Linear(in_features = hidden_units, out_features = hidden_units),
        nn.GELU(),
        nn.Linear(in_features = hidden_units, out_features = hidden_units),
        nn.GELU(),
        nn.Linear(in_features = hidden_units, out_features = output_features),
        )
        
    def forward(self,x):
        return self.model(x)
        
#Instantiate model
model_V0 = WOD_model(input_features = combined_df.drop('17.5_score', axis = 1).shape[1], output_features = 1, hidden_units = 16)

In [521]:
model_V0.state_dict

<bound method Module.state_dict of WOD_model(
  (model): Sequential(
    (0): Linear(in_features=14, out_features=16, bias=True)
    (1): GELU(approximate=none)
    (2): Linear(in_features=16, out_features=16, bias=True)
    (3): GELU(approximate=none)
    (4): Linear(in_features=16, out_features=16, bias=True)
    (5): GELU(approximate=none)
    (6): Linear(in_features=16, out_features=16, bias=True)
    (7): GELU(approximate=none)
    (8): Linear(in_features=16, out_features=16, bias=True)
    (9): GELU(approximate=none)
    (10): Linear(in_features=16, out_features=16, bias=True)
    (11): GELU(approximate=none)
    (12): Linear(in_features=16, out_features=1, bias=True)
  )
)>

In [522]:
loss_fn = nn.L1Loss()
optimizer = torch.optim.Adam(params = model_V0.parameters(), lr = 0.01)

In [523]:
#Create training and testing loop

torch.manual_seed(42)

epochs = 1000

for epoch in range(epochs):
    model_V0.train()
    
    y_pred = model_V0(X_train)
    
    loss = loss_fn(y_pred.squeeze(),y_train)
    #print(f'Ypred shape:{y_pred.shape}||y_train shape{y_train.shape}')
    
    optimizer.zero_grad()
    
    loss.backward()
    
    optimizer.step()
    
    ### Testing loop
    
    model_V0.eval()
    
    with torch.inference_mode():
        test_pred = model_V0(X_test)
        test_loss = loss_fn(test_pred,y_test)
    if epoch%100 == 0:
            print(f"Epoch: {epoch}| Training Loss: {loss} | Test Loss: {test_loss}")

Epoch: 0| Training Loss: 876.3413696289062 | Test Loss: 877.4415283203125


  return F.l1_loss(input, target, reduction=self.reduction)


Epoch: 100| Training Loss: 133.33985900878906 | Test Loss: 305.2279357910156
Epoch: 200| Training Loss: 130.52357482910156 | Test Loss: 305.8860168457031
Epoch: 300| Training Loss: 129.24842834472656 | Test Loss: 304.56817626953125
Epoch: 400| Training Loss: 128.02420043945312 | Test Loss: 302.2587890625
Epoch: 500| Training Loss: 125.88590240478516 | Test Loss: 307.2287902832031
Epoch: 600| Training Loss: 126.13172912597656 | Test Loss: 304.56927490234375
Epoch: 700| Training Loss: 122.87552642822266 | Test Loss: 311.7474365234375
Epoch: 800| Training Loss: 121.6292495727539 | Test Loss: 312.0110168457031
Epoch: 900| Training Loss: 118.18629455566406 | Test Loss: 311.4102783203125


In [524]:
from torchmetrics.regression import MeanAbsoluteError

mae_fn = MeanAbsoluteError()

mae_fn(y_pred.squeeze(),y_train)

tensor(116.9070, grad_fn=<SqueezeBackward0>)

In [525]:
y_pred.squeeze().detach

<function Tensor.detach>

In [526]:
pred = y_pred.squeeze().detach().numpy()
true_vals = y_train.detach().numpy()

In [527]:
r2_score(true_vals,pred)

0.54497132681236

In [528]:
pd.DataFrame({'actual': true_vals, 'preds': pred}).head(10)

Unnamed: 0,actual,preds
0,651.0,720.968201
1,738.0,642.672058
2,836.0,798.253174
3,776.0,783.603638
4,592.0,566.166382
5,978.0,821.732666
6,469.0,555.270996
7,790.0,702.443359
8,1285.0,1180.391479
9,588.0,553.960022


In [529]:
#Take weights of neural network and use it to predict the output of all the 17.5 scores!

model_V0.eval()

with torch.inference_mode():
    _17_5_score_predictions = model_V0(X_torch)
    
_17_5_score_predictions = _17_5_score_predictions.squeeze().detach().numpy()
combined_df['17.5_score_preds'] = _17_5_score_predictions

In [530]:
combined_df

Unnamed: 0,Age,Height_inches,Weight_lbs,Back Squat,Clean_and_Jerk,Snatch,Deadlift,Fight Gone Bad,17.1_time,17.1_reps,17.2_score,17.3_time,17.3_reps,17.4_score,17.5_score,17.5_score_preds
0,34,77.0,231.0,335.0,265.0,210.0,415.0,393.0,1200.0,218,92.0,1440.0,56,178.0,920,941.675171
1,28,72.0,240.0,420.0,295.0,225.0,455.0,286.0,1200.0,221,78.0,1440.0,88,173.0,968,944.547607
2,37,74.0,198.0,297.0,231.0,169.0,352.0,398.0,878.0,225,128.0,1440.0,67,186.0,794,723.678894
3,42,65.0,145.0,330.0,250.0,198.0,375.0,314.0,1200.0,178,89.0,1440.0,80,176.0,947,947.768616
4,36,70.0,200.0,400.0,285.0,215.0,485.0,438.0,842.0,225,182.0,1440.0,104,219.0,563,598.332642
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5556,29,73.0,250.0,325.0,245.0,175.0,445.0,274.0,1200.0,195,78.0,1440.0,57,165.0,0,0.951611
5557,31,68.0,163.0,282.0,187.0,132.0,359.0,282.0,860.0,225,123.0,1440.0,80,201.0,536,693.676086
5558,36,74.0,212.0,405.0,310.0,250.0,465.0,419.0,804.0,225,167.0,1440.0,130,217.0,565,574.817993
5559,28,67.0,155.0,315.0,220.0,165.0,405.0,305.0,995.0,225,136.0,1440.0,80,197.0,688,704.804993
