In [1]:
try:
    from google.colab import drive

    !gdown "1K1rc0tBiqdSs7ZUkSZTkUCnixvsaF0Dx" # gymvisual-cleaned-2.json
    !gdown "1SlgWerOrAqgBdaE4Hhzb8XHVBhPKzKwj" # test-user.json (DUMMY, SHOULD RETRIEVE FROM DATABASE)

    workout_json = './gymvisual-cleaned-2.json'
    hist_json = './test-user.json'
except ImportError:
    workout_json = '../data/gymvisual-cleaned-2.json'
    hist_json = '../data/test-user.json'

Downloading...
From: https://drive.google.com/uc?id=1K1rc0tBiqdSs7ZUkSZTkUCnixvsaF0Dx
To: /content/gymvisual-cleaned-2.json
100% 903k/903k [00:00<00:00, 34.4MB/s]
Downloading...
From: https://drive.google.com/uc?id=1SlgWerOrAqgBdaE4Hhzb8XHVBhPKzKwj
To: /content/test-user.json
100% 789/789 [00:00<00:00, 2.13MB/s]


In [2]:
import json

import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.layers import (Dense, Embedding, Flatten, Input, concatenate)
from tensorflow.keras.models import Model

In [3]:
with open(workout_json, 'r') as f:
    workout_f = json.load(f)

df_workout = pd.json_normalize(workout_f)
df_hist = pd.read_json(hist_json)

df_workout.drop(
    ['desc', 'jpg', 'gif', 'duration.desc', 'duration.min', 'duration.rep', 'duration.set', 'duration.sec'],
    axis=1, inplace=True
)

In [4]:
pd.merge(df_hist, df_workout, on='title')

Unnamed: 0,name,gender_x,level_x,title,rating,type,body_part,gender_y,level_y
0,new,Male,Beginner,3/4 Sit-up,4,Strength,Waist,Male,Beginner
1,new,Male,Beginner,Alternate Heel Touchers,5,Strength,Waist,Male,Beginner
2,new,Male,Beginner,Bench Dip (knees bent),7,Strength,Upper Arms,Male,Beginner
3,new,Male,Beginner,One Arm Dip,9,Strength,Upper Arms,Male,Beginner
4,new,Male,Beginner,Overhead Triceps Stretch,8,Stretching,Upper Arms,Male,Beginner


In [5]:
df_workout.loc[
    df_workout.title.isin(
        ['3/4 Sit-up', 'Alternate Heel Touchers', 'Bench Dip (knees bent)', 'One Arm Dip', 'Overhead Triceps Stretch']
    )
]

Unnamed: 0,title,type,body_part,gender,level
0,3/4 Sit-up,Strength,Waist,Male,Beginner
3,Alternate Heel Touchers,Strength,Waist,Male,Beginner
5,Bench Dip (knees bent),Strength,Upper Arms,Male,Beginner
111,One Arm Dip,Strength,Upper Arms,Male,Beginner
112,Overhead Triceps Stretch,Stretching,Upper Arms,Male,Beginner


In [6]:
workout_col = list(df_workout.select_dtypes(exclude=[np.number]))
hist_col = list(df_hist.select_dtypes(exclude=[np.number]))

columns_to_encode = set(workout_col + hist_col)
le = {
    col: LabelEncoder().fit(df_workout[col]
                            if col in df_workout.columns
                            else df_hist[col]) for col in columns_to_encode
}

for col in columns_to_encode:

    if col in df_workout.columns:
        df_workout[col] = le[col].transform(df_workout[col])

    if col in df_hist.columns:
        df_hist[col] = le[col].transform(df_hist[col])

In [7]:
pd.merge(df_hist, df_workout, on='title')

Unnamed: 0,name,gender_x,level_x,title,rating,type,body_part,gender_y,level_y
0,0,1,0,6,4,1,50,1,0
1,0,1,0,40,5,1,50,1,0
2,0,1,0,108,7,1,48,1,0
3,0,1,0,1025,9,1,48,1,0
4,0,1,0,1045,8,2,48,1,0


In [8]:
df_workout.loc[
    df_workout.title.isin(
        [6, 40, 108, 1025, 1045]
    )
]

Unnamed: 0,title,type,body_part,gender,level
0,6,1,50,1,0
3,40,1,50,1,0
5,108,1,48,1,0
111,1025,1,48,1,0
112,1045,2,48,1,0


In [9]:
def train(workout_data, history_data=None):
    if history_data is not None and len(history_data) >= 5:
        features = ['name', 'gender_x', 'level_x', 'title', 'rating', 'type', 'body_part', 'gender_y', 'level_y']

        merged_data = pd.merge(df_hist, df_workout, on='title')
        X_train, X_test, Y_train, Y_test = train_test_split(merged_data[features], merged_data['rating'], test_size=0.2)
        # merged_data = merged_data.drop_duplicates(subset=['title'], keep='last')


        Y_train, Y_test = Y_train > 5, Y_test > 5

        model = tf.keras.Sequential([
            tf.keras.layers.Dense(units=30, activation='relu'),
            tf.keras.layers.Dense(units=10, activation='relu'),
            tf.keras.layers.Dropout(0.5),
            tf.keras.layers.Dense(units=1, activation='sigmoid'),
        ])

        model.compile(
            loss=tf.keras.losses.BinaryCrossentropy(),
            optimizer=tf.keras.optimizers.Adam(),
            metrics=['accuracy']
        )

        history = model.fit(
            X_train, Y_train,
            epochs=100,
            validation_data=(X_test, Y_test),
            verbose=0
        )

        loss = model.evaluate(X_test)
        print(f"Test loss: {loss}")

        return model
    else:
        pass

In [10]:
model = train(df_workout, df_hist)

Test loss: [0.0, 0.0]


In [11]:
features = ['name', 'gender_x', 'level_x', 'title', 'rating', 'type', 'body_part', 'gender_y', 'level_y']

merged_data = pd.merge(df_hist, df_workout, on='title')
model.predict(merged_data[features])



array([[5.8159727e-04],
       [3.8882548e-01],
       [9.9852103e-01],
       [1.0000000e+00],
       [1.0000000e+00]], dtype=float32)