# Dependencies

In [1]:
!pip install tensorflow-text

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow-text
  Downloading tensorflow_text-2.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.0/6.0 MB[0m [31m88.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tensorflow-text
Successfully installed tensorflow-text-2.12.1


In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
import pickle
import re
import tensorflow_hub as hub
import tensorflow_text as text
from sklearn import metrics

# Data Preparation

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


Using dataset: [Gym Exercise Dataset | Kaggle](https://www.kaggle.com/datasets/niharika41298/gym-exercise-data)

In [4]:
file_path = '/content/gdrive/MyDrive/FAIZ/Dataset/Workout recom/megaGymDataset.csv'

In [5]:
data = pd.read_csv(file_path)
data.head()

Unnamed: 0.1,Unnamed: 0,Title,Desc,Type,BodyPart,Equipment,Level,Rating,RatingDesc
0,0,Partner plank band row,The partner plank band row is an abdominal exe...,Strength,Abdominals,Bands,Intermediate,0.0,
1,1,Banded crunch isometric hold,The banded crunch isometric hold is an exercis...,Strength,Abdominals,Bands,Intermediate,,
2,2,FYR Banded Plank Jack,The banded plank jack is a variation on the pl...,Strength,Abdominals,Bands,Intermediate,,
3,3,Banded crunch,The banded crunch is an exercise targeting the...,Strength,Abdominals,Bands,Intermediate,,
4,4,Crunch,The crunch is a popular core exercise targetin...,Strength,Abdominals,Bands,Intermediate,,


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2918 entries, 0 to 2917
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  2918 non-null   int64  
 1   Title       2918 non-null   object 
 2   Desc        1368 non-null   object 
 3   Type        2918 non-null   object 
 4   BodyPart    2918 non-null   object 
 5   Equipment   2918 non-null   object 
 6   Level       2918 non-null   object 
 7   Rating      1031 non-null   float64
 8   RatingDesc  862 non-null    object 
dtypes: float64(1), int64(1), object(7)
memory usage: 205.3+ KB


In [7]:
# Dropping irrelevant columns
data = data.drop(['Unnamed: 0', 'Rating', 'RatingDesc'], axis=1)

In [8]:
# Dropping rows with blank description, system will use description to find similarities
data = data.dropna(subset=['Desc'])

In [9]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1368 entries, 0 to 2916
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Title      1368 non-null   object
 1   Desc       1368 non-null   object
 2   Type       1368 non-null   object
 3   BodyPart   1368 non-null   object
 4   Equipment  1368 non-null   object
 5   Level      1368 non-null   object
dtypes: object(6)
memory usage: 74.8+ KB


In [10]:
data.value_counts('Type')

Type
Strength                 1234
Plyometrics                53
Stretching                 44
Cardio                     17
Powerlifting               11
Olympic Weightlifting       5
Strongman                   4
dtype: int64

In [11]:
data.value_counts('BodyPart')

BodyPart
Abdominals     298
Quadriceps     245
Shoulders      174
Chest          149
Biceps         101
Triceps         88
Lats            69
Hamstrings      50
Middle Back     49
Lower Back      42
Glutes          29
Calves          26
Traps           17
Forearms        16
Abductors        8
Adductors        7
dtype: int64

In [12]:
data.value_counts('Equipment')

Equipment
Body Only        404
Dumbbell         246
Barbell          161
Cable            149
Machine          121
Other            101
Kettlebells       53
Bands             49
Exercise Ball     28
Medicine Ball     25
E-Z Curl Bar      14
None               9
Foam Roll          8
dtype: int64

In [13]:
data.value_counts('Level')

Level
Intermediate    1250
Beginner         108
Expert            10
dtype: int64

According to the data, system will recommend top 3 alternatives to given workout

In [14]:
data.loc[data['Equipment'] == 'None']

Unnamed: 0,Title,Desc,Type,BodyPart,Equipment,Level
637,Decline oblique crunch,The decline oblique crunch is a popular bodywe...,Strength,Abdominals,,Intermediate
638,Decline sit-up,The decline sit-up is a bodyweight core exerci...,Strength,Abdominals,,Intermediate
639,Hanging Windshield Wiper,The hanging windshield wiper is an advanced ab...,Strength,Abdominals,,Intermediate
1402,Glute ham raise-,The glute ham raise is an exercise targeting t...,Strength,Hamstrings,,Beginner
1403,Lying hamstring stretch with band,The lying hamstring stretch with band is a sim...,Stretching,Hamstrings,,Beginner
1406,Alternating lunge jump,The alternating lunge jump is an explosive bod...,Stretching,Hamstrings,,Beginner
2421,Dumbbell lateral hop to sprint,The dumbbell lateral hop to sprint is a multi-...,Plyometrics,Quadriceps,,Intermediate
2422,Smith machine lunge sprint,The Smith machine lunge sprint is a lower-body...,Strength,Quadriceps,,Intermediate
2423,Sissy squat,The sissy squat is a bodyweight squat variatio...,Strength,Quadriceps,,Intermediate


In [15]:
data = data.replace('None', "Body Only")

In [16]:
data.value_counts('Equipment')

Equipment
Body Only        413
Dumbbell         246
Barbell          161
Cable            149
Machine          121
Other            101
Kettlebells       53
Bands             49
Exercise Ball     28
Medicine Ball     25
E-Z Curl Bar      14
Foam Roll          8
dtype: int64

In [17]:
# data.to_csv(r'/content/data.csv', index=False, header=True)

In [18]:
data.isna().sum()

Title        0
Desc         0
Type         0
BodyPart     0
Equipment    0
Level        0
dtype: int64

In [19]:
data.duplicated().sum()

9

In [20]:
data.loc[data.duplicated(subset=['Title'])]

Unnamed: 0,Title,Desc,Type,BodyPart,Equipment,Level
97,Decline bar press sit-up,The decline bar press sit-up is a weighted cor...,Strength,Abdominals,Barbell,Intermediate
645,Exercise Ball Cable Crunch - Gethin Variation,The exercise ball crunch is a popular gym exer...,Strength,Abdominals,Cable,Intermediate
939,Band-suspended kettlebell bench press,The band-suspended kettlebell bench press is a...,Strength,Chest,Bands,Intermediate
958,Band-suspended kettlebell bench press,The band-suspended kettlebell bench press is a...,Strength,Chest,Bands,Intermediate
1709,Seated Cable Rows,The cable seated row is a popular exercise to ...,Strength,Middle Back,Cable,Intermediate
1730,Seated Cable Rows,The cable seated row is a popular exercise to ...,Strength,Middle Back,Cable,Intermediate
2004,Dumbbell step-up,The dumbbell step-up is a great exercise for b...,Strength,Quadriceps,Dumbbell,Intermediate
2655,Arnold press,Named after the iconic bodybuilder and movie s...,Strength,Shoulders,Dumbbell,Intermediate
2658,Seated rear delt fly,The seated rear delt fly is an upper-body exer...,Strength,Shoulders,Dumbbell,Intermediate


In [21]:
# Dropping duplicate workout
data = data.drop_duplicates(subset='Title')

In [22]:
data = data.reset_index(drop=True)
data

Unnamed: 0,Title,Desc,Type,BodyPart,Equipment,Level
0,Partner plank band row,The partner plank band row is an abdominal exe...,Strength,Abdominals,Bands,Intermediate
1,Banded crunch isometric hold,The banded crunch isometric hold is an exercis...,Strength,Abdominals,Bands,Intermediate
2,FYR Banded Plank Jack,The banded plank jack is a variation on the pl...,Strength,Abdominals,Bands,Intermediate
3,Banded crunch,The banded crunch is an exercise targeting the...,Strength,Abdominals,Bands,Intermediate
4,Crunch,The crunch is a popular core exercise targetin...,Strength,Abdominals,Bands,Intermediate
...,...,...,...,...,...,...
1354,Decline EZ-bar skullcrusher,The decline EZ-bar skullcrusher is a popular e...,Strength,Triceps,E-Z Curl Bar,Intermediate
1355,EZ-Bar Skullcrusher,The EZ-bar skullcrusher is a popular exercise ...,Strength,Triceps,E-Z Curl Bar,Intermediate
1356,EZ-bar skullcrusher-,The EZ-bar skullcrusher is a popular exercise ...,Strength,Triceps,E-Z Curl Bar,Intermediate
1357,EZ-Bar Skullcrusher - Gethin Variation,The EZ-bar skullcrusher is a popular exercise ...,Strength,Triceps,E-Z Curl Bar,Intermediate


In [23]:
data['Title'] = data['Title'].apply(lambda x: re.sub('[^A-Za-z0-9]+', ' ', x))

In [24]:
data['Title'] = data['Title'].apply(lambda x: x.lower())

In [25]:
data['Title'] = data['Title'].apply(lambda x: x.strip())

In [26]:
data.loc[data.duplicated(subset=['Title'])]

Unnamed: 0,Title,Desc,Type,BodyPart,Equipment,Level
35,dumbbell v sit cross jab,The dumbbell V-sit cross jab is a hybrid movem...,Strength,Abdominals,Dumbbell,Intermediate
199,decline crunch,The decline crunch is a popular bodyweight exe...,Strength,Abdominals,Body Only,Intermediate
232,otis up,The Otis-up is a weighted exercise focusing on...,Strength,Abdominals,Body Only,Intermediate
328,barbell curl,The barbell curl is an arm exercise that is al...,Strength,Biceps,Barbell,Intermediate
437,standing calf raise,The standing calf raise is a popular movement ...,Strength,Calves,Body Only,Intermediate
477,push up to side plank,The push-up to side plank is an upper-body and...,Strength,Chest,Body Only,Intermediate
517,smith machine incline bench press,The Smith machine incline bench press is a mac...,Strength,Chest,Machine,Intermediate
557,dumbbell bench press,The dumbbell bench press is a mainstay of lift...,Strength,Chest,Dumbbell,Intermediate
623,barbell hip thrust,The barbell hip thrust is a popular exercise t...,Strength,Glutes,Body Only,Intermediate
675,glute ham raise,The glute ham raise is an exercise targeting t...,Strength,Hamstrings,Body Only,Beginner


In [27]:
# Dropping duplicate workout
data = data.drop_duplicates(subset='Title')

In [28]:
data = data.reset_index(drop=True)
data

Unnamed: 0,Title,Desc,Type,BodyPart,Equipment,Level
0,partner plank band row,The partner plank band row is an abdominal exe...,Strength,Abdominals,Bands,Intermediate
1,banded crunch isometric hold,The banded crunch isometric hold is an exercis...,Strength,Abdominals,Bands,Intermediate
2,fyr banded plank jack,The banded plank jack is a variation on the pl...,Strength,Abdominals,Bands,Intermediate
3,banded crunch,The banded crunch is an exercise targeting the...,Strength,Abdominals,Bands,Intermediate
4,crunch,The crunch is a popular core exercise targetin...,Strength,Abdominals,Bands,Intermediate
...,...,...,...,...,...,...
1324,bench dip,The bench dip is a highly effective exercise f...,Strength,Triceps,Body Only,Intermediate
1325,decline ez bar skullcrusher,The decline EZ-bar skullcrusher is a popular e...,Strength,Triceps,E-Z Curl Bar,Intermediate
1326,ez bar skullcrusher,The EZ-bar skullcrusher is a popular exercise ...,Strength,Triceps,E-Z Curl Bar,Intermediate
1327,ez bar skullcrusher gethin variation,The EZ-bar skullcrusher is a popular exercise ...,Strength,Triceps,E-Z Curl Bar,Intermediate


In [29]:
data.to_csv(r'/content/workout_dataset.csv', index=True, header=True)

# Modeling

In [30]:
new_data = data

In [31]:
# Normalizing description feature
new_data['Desc'] = data['Desc'].apply(lambda x: x.lower())
new_data['Desc'] = new_data['Desc'].apply(lambda x: re.sub('[^A-Za-z0-9]+', ' ', x))

In [32]:
new_data.head()

Unnamed: 0,Title,Desc,Type,BodyPart,Equipment,Level
0,partner plank band row,the partner plank band row is an abdominal exe...,Strength,Abdominals,Bands,Intermediate
1,banded crunch isometric hold,the banded crunch isometric hold is an exercis...,Strength,Abdominals,Bands,Intermediate
2,fyr banded plank jack,the banded plank jack is a variation on the pl...,Strength,Abdominals,Bands,Intermediate
3,banded crunch,the banded crunch is an exercise targeting the...,Strength,Abdominals,Bands,Intermediate
4,crunch,the crunch is a popular core exercise targetin...,Strength,Abdominals,Bands,Intermediate


In [33]:
encoded_data = new_data

In [34]:
# Using BERT to encode description text
preprocessor = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4", trainable=True)

def get_bert_embeddings(text, preprocessor, encoder):
  text_input = tf.keras.layers.Input(shape=(), dtype=tf.string)
  encoder_inputs = preprocessor(text_input)
  outputs = encoder(encoder_inputs)
  embedding_model = tf.keras.Model(text_input, outputs['pooled_output'])
  sentences = tf.constant([text])
  return embedding_model(sentences)

encoded_data['encodings'] = encoded_data['Desc'].apply(lambda x: get_bert_embeddings(x, preprocessor, encoder))

In [35]:
encoded_data

Unnamed: 0,Title,Desc,Type,BodyPart,Equipment,Level,encodings
0,partner plank band row,the partner plank band row is an abdominal exe...,Strength,Abdominals,Bands,Intermediate,"((tf.Tensor(-0.497315, shape=(), dtype=float32..."
1,banded crunch isometric hold,the banded crunch isometric hold is an exercis...,Strength,Abdominals,Bands,Intermediate,"((tf.Tensor(-0.6131973, shape=(), dtype=float3..."
2,fyr banded plank jack,the banded plank jack is a variation on the pl...,Strength,Abdominals,Bands,Intermediate,"((tf.Tensor(-0.6472458, shape=(), dtype=float3..."
3,banded crunch,the banded crunch is an exercise targeting the...,Strength,Abdominals,Bands,Intermediate,"((tf.Tensor(-0.6446581, shape=(), dtype=float3..."
4,crunch,the crunch is a popular core exercise targetin...,Strength,Abdominals,Bands,Intermediate,"((tf.Tensor(-0.62150997, shape=(), dtype=float..."
...,...,...,...,...,...,...,...
1324,bench dip,the bench dip is a highly effective exercise f...,Strength,Triceps,Body Only,Intermediate,"((tf.Tensor(-0.76620835, shape=(), dtype=float..."
1325,decline ez bar skullcrusher,the decline ez bar skullcrusher is a popular e...,Strength,Triceps,E-Z Curl Bar,Intermediate,"((tf.Tensor(-0.26601514, shape=(), dtype=float..."
1326,ez bar skullcrusher,the ez bar skullcrusher is a popular exercise ...,Strength,Triceps,E-Z Curl Bar,Intermediate,"((tf.Tensor(-0.73581356, shape=(), dtype=float..."
1327,ez bar skullcrusher gethin variation,the ez bar skullcrusher is a popular exercise ...,Strength,Triceps,E-Z Curl Bar,Intermediate,"((tf.Tensor(-0.73581356, shape=(), dtype=float..."


In [40]:
encodings = encoded_data['encodings']
encodings

0       ((tf.Tensor(-0.497315, shape=(), dtype=float32...
1       ((tf.Tensor(-0.6131973, shape=(), dtype=float3...
2       ((tf.Tensor(-0.6472458, shape=(), dtype=float3...
3       ((tf.Tensor(-0.6446581, shape=(), dtype=float3...
4       ((tf.Tensor(-0.62150997, shape=(), dtype=float...
                              ...                        
1324    ((tf.Tensor(-0.76620835, shape=(), dtype=float...
1325    ((tf.Tensor(-0.26601514, shape=(), dtype=float...
1326    ((tf.Tensor(-0.73581356, shape=(), dtype=float...
1327    ((tf.Tensor(-0.73581356, shape=(), dtype=float...
1328    ((tf.Tensor(-0.73581356, shape=(), dtype=float...
Name: encodings, Length: 1329, dtype: object

In [42]:
encodings.to_json('encodings.json', default_handler=str)

In [99]:
with open('encodings.pickle', 'wb') as pkl:
  pickle.dump(encodings, pkl)

# Retrieving recommendations

In [None]:
# Take one workout sample
sample_input = new_data.sample()
sample_input

In [None]:
query_text = sample_input.iat[0, 1]
print(query_text)

In [None]:
query_encoding = get_bert_embeddings(query_text, preprocessor, encoder)

# Calculating cosine similarity
encoded_data['similarity_score'] = encoded_data['encodings'].apply(lambda x: metrics.pairwise.cosine_similarity(x, query_encoding)[0][0])
results = encoded_data.sort_values(by=['similarity_score'], ascending=False)

In [None]:
# Returning top 3 recommendation
top3 = results.iloc[1:4]
top3 = top3.drop('encodings', axis=1)
top3

In [None]:
def give_recommendation(text: str) -> int:
    

# Evaluation

In [None]:
def eval(index):
  # Taking a row from workout data
  sample_input = new_data.iloc[[index]]
  query_text = sample_input.iat[0, 1]
  query_encoding = get_bert_embeddings(query_text, preprocessor, encoder)

  # Calculating similarity
  encoded_data['similarity_score'] = encoded_data['encodings'].apply(lambda x: metrics.pairwise.cosine_similarity(x, query_encoding)[0][0])
  results = encoded_data.sort_values(by=['similarity_score'], ascending=False)

  # Returning top 3 recommendation
  top3 = results.iloc[1:4]
  top3 = top3.drop('encodings', axis=1)

  # Normalizing input dataframe
  sample_input = sample_input.drop('encodings', axis=1)
  sample_input = sample_input.reset_index(drop=True)
  sample_input = pd.concat([sample_input]*3, ignore_index=True)

  # Normalizing output dataframe
  top3 = top3.reset_index(drop=True)
  top3 = top3.drop('similarity_score', axis=1)

  # Calculating precision
  top3['type_precision'] = np.where(sample_input['Type'] == top3['Type'], 1, 0)
  top3['body_precision'] = np.where(sample_input['BodyPart'] == top3['BodyPart'], 1, 0)
  top3['equipment_precision'] = np.where(sample_input['Equipment'] == top3['Equipment'], 1, 0)
  top3['level_precision'] = np.where(sample_input['Level'] == top3['Level'], 1, 0)
  precision = (top3['type_precision'].sum() + top3['body_precision'].sum() + top3['equipment_precision'].sum() + top3['level_precision'].sum())/12
  return precision

In [None]:
# Iterating through all rows of data to calculate system precision
all_precision = []
for i in range(len(new_data.index)):
  precision = eval(i)
  all_precision.append(precision)
mean_precision = sum(all_precision) / len(all_precision)
print('System precision:', "{:.0%}".format(mean_precision))