In [1]:
# UNCOMMENT THESE TO INSTALL THE REQUIRED LIBRARIES

# %pip install pandas
# %pip install tensorflow
# %pip install scikit-learn
# %pip install pymysql
# %pip install sshtunnel

In [2]:
import numpy as np
import pandas as pd
import sklearn
import tensorflow as tf
import pymysql
import sshtunnel

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

print("Numpy version:", np.__version__)
print("Pandas version:", pd.__version__)
print("Tensorflow version:", tf.__version__)
print("Sklearn version:", sklearn.__version__)
print("Pymysql version:", pymysql.__version__)
print("Sshtunnel version:", sshtunnel.__version__)

MODEL_PATH = "../model/ann_gym_recommender.keras"
DATA_PATH = "../data/megaGymDataset.csv"

Numpy version: 1.26.4
Pandas version: 2.2.2
Tensorflow version: 2.16.1
Sklearn version: 1.4.2
Pymysql version: 1.4.6
Sshtunnel version: 0.4.0


### Reading data from a SQL-SERVER

In [3]:
try:
    # Set up SSH tunnel
    server = sshtunnel.SSHTunnelForwarder(
        ('selene.hud.ac.uk', 22),
        ssh_username='workit',
        ssh_password='umbra(despair>Quartz218',
        remote_bind_address=('localhost', 3306)
    )
    server.start()
    
    # Defining the connection
    connection = pymysql.connect(host='localhost', port=server.local_bind_port, user='workit', password='umbra(despair>Quartz218', database='workit')

    # Load the dataset
    dataset = pd.read_sql_query("SELECT * FROM `megaGymDataset`", connection)

    server.stop()
except:
    print("Could not connect to Database, reading local file from `" + DATA_PATH + "` instead... ")
    dataset = pd.read_csv(DATA_PATH)

2024-07-14 01:48:42,655| ERROR   | Could not connect to gateway selene.hud.ac.uk:22 : Unable to connect to selene.hud.ac.uk: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond


Could not connect to Database, reading local file from `../data/megaGymDataset.csv` instead... 


In [4]:
# Load the dataset
dataset.head()

Unnamed: 0,ID,Title,Desc,Type,BodyPart,Equipment,Level,Rating
0,0,Partner plank band row,The partner plank band row is an abdominal exe...,Strength,Abdominals,Bands,Intermediate,0.0
1,1,Banded crunch isometric hold,The banded crunch isometric hold is an exercis...,Strength,Abdominals,Bands,Intermediate,4.5
2,2,FYR Banded Plank Jack,The banded plank jack is a variation on the pl...,Strength,Abdominals,Bands,Intermediate,4.5
3,3,Banded crunch,The banded crunch is an exercise targeting the...,Strength,Abdominals,Bands,Intermediate,4.5
4,4,Crunch,The crunch is a popular core exercise targetin...,Strength,Abdominals,Bands,Intermediate,4.5


### Training from dataset

In [5]:
def labelEncodersScaler(data):
    label_encoders = {}
    for feature in ["Type", "BodyPart", "Equipment", "Level"]:
        label_encoders[feature] = LabelEncoder()
        data[feature] = label_encoders[feature].fit_transform(data[feature])

    X = data[["Type", "BodyPart", "Equipment", "Level"]]
    y = data["ID"]
    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Standardize the features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)

    return label_encoders, scaler

In [6]:
def train_model(data):
    label_encoders, scaler = labelEncodersScaler(data)
    
    X = data[["Type", "BodyPart", "Equipment", "Level"]]
    y = data["ID"]
    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    # Build the ANN model using TensorFlow
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(len(data["ID"].unique()), activation='softmax')  # Use the unique number of IDs as the output dimension
    ])
    
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    
    model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

    return model

In [7]:
model = train_model(dataset.copy())
model.save(MODEL_PATH)

Epoch 1/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.0000e+00 - loss: 7.9917 - val_accuracy: 0.0000e+00 - val_loss: 8.0173
Epoch 2/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.0050 - loss: 7.8948 - val_accuracy: 0.0000e+00 - val_loss: 8.6093
Epoch 3/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.0027 - loss: 7.0349 - val_accuracy: 0.0000e+00 - val_loss: 10.2416
Epoch 4/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.0110 - loss: 6.2264 - val_accuracy: 0.0000e+00 - val_loss: 12.5033
Epoch 5/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.0210 - loss: 5.7694 - val_accuracy: 0.0000e+00 - val_loss: 14.0761
Epoch 6/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.0183 - loss: 5.3892 - val_accuracy: 0.0000e+00 - val_loss: 15.6298
Epoch 7/