In [177]:
#Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import torch
%matplotlib inline

In [178]:
combined_df = pd.read_csv('Mens_Crossfit_data_cleaned.csv')

In [179]:
combined_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5963 entries, 0 to 5962
Data columns (total 17 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Userid          5963 non-null   int64  
 1   Name            5963 non-null   object 
 2   Age             5963 non-null   int64  
 3   Height_inches   5963 non-null   float64
 4   Weight_lbs      5963 non-null   float64
 5   Back Squat      5963 non-null   float64
 6   Clean_and_Jerk  5963 non-null   float64
 7   Snatch          5963 non-null   float64
 8   Deadlift        5963 non-null   float64
 9   Fight Gone Bad  5963 non-null   float64
 10  17.1_time       5963 non-null   float64
 11  17.1_reps       5963 non-null   int64  
 12  17.2_score      5963 non-null   float64
 13  17.3_time       5963 non-null   float64
 14  17.3_reps       5963 non-null   int64  
 15  17.4_score      5963 non-null   float64
 16  17.5_score      5963 non-null   int64  
dtypes: float64(11), int64(5), object(

In [180]:
combined_df.columns

Index(['Userid', 'Name', 'Age', 'Height_inches', 'Weight_lbs', 'Back Squat',
       'Clean_and_Jerk', 'Snatch', 'Deadlift', 'Fight Gone Bad', '17.1_time',
       '17.1_reps', '17.2_score', '17.3_time', '17.3_reps', '17.4_score',
       '17.5_score'],
      dtype='object')

In [181]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score,mean_absolute_error

#Let's create our training and testing data

#Get rid of the names and userid as these will not help when modeling!
combined_df.drop(['Userid','Name'], axis = 1, inplace = True)


In [182]:
#Let's predict our 17.5 score

X = combined_df.drop('17.5_score', axis = 1)
y = combined_df['17.5_score']

In [183]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2, random_state = 42)

model = RandomForestRegressor(n_estimators = 100, random_state = 42)

model.fit(X_train,y_train)

In [184]:
y_preds = model.predict(X_test)

# view the predictions
pd.DataFrame({'actual': y_test, 'preds': y_preds}).head(10)

Unnamed: 0,actual,preds
705,694,562.42
3328,1058,839.32
1344,868,713.63
1219,587,607.05
1193,767,688.41
296,475,505.15
177,1040,892.71
1421,665,669.78
3026,677,665.11
5372,730,660.82


In [185]:
r2_score(y_test,y_preds)

0.2986098148197689

In [186]:
#Calculate MAE
mae = mean_absolute_error(y_test,y_preds)
mae

164.14396479463537

### PyTorch Approach

In [215]:
!pip install torchmetrics



In [216]:
import torch
from torch import nn
import matplotlib.pyplot as plt

X_torch = torch.from_numpy(X.values).type(torch.float32)
y_torch = torch.from_numpy(y.values).type(torch.float32)

In [217]:
X_train, X_test, y_train, y_test = train_test_split(X_torch, y_torch, test_size=0.33, random_state=42)

In [218]:
X_train.dtype,X_test.dtype,y_train.dtype,y_test.dtype

(torch.float32, torch.float32, torch.float32, torch.float32)

In [219]:
#Create model and instantiate

class WOD_model(nn.Module):
    def __init__(self,input_features, output_features, hidden_units):
        super().__init__()
        
        #Create the layers
        self.model = nn.Sequential(
        nn.Linear(in_features = input_features, out_features = hidden_units),
        nn.ReLU(),
        nn.Linear(in_features = hidden_units, out_features = hidden_units),
        nn.ReLU(),
        nn.Linear(in_features = hidden_units, out_features = hidden_units),
        nn.ReLU(),
        nn.Linear(in_features = hidden_units, out_features = hidden_units),
        nn.ReLU(),
        nn.Linear(in_features = hidden_units, out_features = output_features),
        )
        
    def forward(self,x):
        return self.model(x)
        
#Instantiate model
model_V0 = WOD_model(input_features = combined_df.drop('17.5_score', axis = 1).shape[1], output_features = 1, hidden_units = 16)

In [220]:
model_V0.state_dict

<bound method Module.state_dict of WOD_model(
  (model): Sequential(
    (0): Linear(in_features=14, out_features=16, bias=True)
    (1): ReLU()
    (2): Linear(in_features=16, out_features=16, bias=True)
    (3): ReLU()
    (4): Linear(in_features=16, out_features=16, bias=True)
    (5): ReLU()
    (6): Linear(in_features=16, out_features=16, bias=True)
    (7): ReLU()
    (8): Linear(in_features=16, out_features=1, bias=True)
  )
)>

In [221]:
loss_fn = nn.L1Loss()
optimizer = torch.optim.SGD(params = model_V0.parameters(), lr = 0.02)

In [222]:
#Create training and testing loop

torch.manual_seed(42)

epochs = 1000

for epoch in range(epochs):
    model_V0.train()
    
    y_pred = model_V0(X_train)
    
    loss = loss_fn(y_pred,y_train)
    
    optimizer.zero_grad()
    
    loss.backward()
    
    optimizer.step()
    
    ### Testing loop
    
    model_V0.eval()
    
    with torch.inference_mode():
        test_pred = model_V0(X_test)
        test_loss = loss_fn(test_pred,y_test)
    if epoch%100 == 0:
            print(f"Epoch: {epoch}| Training Loss: {loss} | Test Loss: {test_loss}")

Epoch: 0| Training Loss: 885.8988037109375 | Test Loss: 322.9837341308594


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Epoch: 100| Training Loss: 262.1097412109375 | Test Loss: 8703.2119140625
Epoch: 200| Training Loss: 262.1084899902344 | Test Loss: 8706.2744140625
Epoch: 300| Training Loss: 262.1086120605469 | Test Loss: 8695.8740234375
Epoch: 400| Training Loss: 262.10955810546875 | Test Loss: 8704.00390625
Epoch: 500| Training Loss: 262.10968017578125 | Test Loss: 8703.7412109375
Epoch: 600| Training Loss: 262.10882568359375 | Test Loss: 8696.6650390625
Epoch: 700| Training Loss: 262.109375 | Test Loss: 8698.064453125
Epoch: 800| Training Loss: 262.1085205078125 | Test Loss: 8706.236328125
Epoch: 900| Training Loss: 262.1095886230469 | Test Loss: 8704.2275390625


In [223]:
from torchmetrics.regression import MeanAbsoluteError

mae_fn = MeanAbsoluteError()

mae_fn(y_pred.squeeze(),y_train)

tensor(262.1086, grad_fn=<SqueezeBackward0>)

In [224]:
y_pred.squeeze().detach

<function Tensor.detach>

In [225]:
pred = y_pred.squeeze().detach().numpy()
true_vals = y_train.detach().numpy()


In [226]:

r2_score(true_vals,pred)

-0.02840681116002508

In [227]:
pd.DataFrame({'actual': true_vals, 'preds': pred}).head(10)

Unnamed: 0,actual,preds
0,638.0,828.07019
1,899.0,828.07019
2,1005.0,828.07019
3,869.0,828.07019
4,833.0,828.07019
5,753.0,828.07019
6,438.0,828.07019
7,1062.0,828.07019
8,941.0,828.07019
9,739.0,828.07019
