In [15]:
import numpy as np
import re
import json
from tqdm import tqdm

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Conv2D, MaxPooling2D, Concatenate

### BOW (Bag of Words) simple text encoder

In [9]:
# create a bag of words from the json
def create_bow_from_json(json_data):
    bow = []
    for _, game_data in json_data.items():
        for key in ['name', 'summary', 'genres', 'keywords']:
            if key in game_data:
                text = game_data[key]
                if isinstance(text, list):
                    text = ' '.join(text)
                # remove special characters and split into words
                words = re.findall(r'\b\w+\b', text.lower())
                for word in words:
                    if word not in bow:
                        bow.append(word)
    return bow

# create dataset for the autoencoder
def create_dataset_from_bow(dat, bow):
    X_dict = {}
    for game_id, game_data in dat.items():
        X = np.zeros(len(bow), dtype=np.float32)
        for key in ['name', 'summary', 'genres', 'keywords']:
            if key in game_data:
                text = game_data[key]
                if isinstance(text, list):
                    text = ' '.join(text)
                # remove special characters and split into words
                words = re.findall(r'\b\w+\b', text.lower())
                for word in words:
                    if word in bow:
                        X[bow.index(word)] += 1
        X_dict[game_id] = X
    return X_dict


In [10]:
# create a bag of words from the json file
with open('igdb_data_500.json', 'r') as f:
    json_data = json.load(f)
bow = create_bow_from_json(json_data)

# create the dataset
X = create_dataset_from_bow(json_data, bow)

In [16]:

# build the autoencoder model
input_dim = len(bow)  # number of unique words in the bag of words
encoding_dim = 256  # compress to 256 dims

input_bow = Input(shape=(input_dim,))
encoded = Dense(encoding_dim, activation='relu')(input_bow)
decoded = Dense(input_dim, activation='sigmoid')(encoded)

autoencoder = Model(input_bow, decoded)
encoder = Model(input_bow, encoded)

autoencoder.compile(optimizer='adam', loss='binary_crossentropy')
X_train = np.array(list(X.values()))
autoencoder.fit(X_train, X_train, epochs=50, batch_size=256, shuffle=True)

Epoch 1/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 111ms/step - loss: 0.6904
Epoch 2/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - loss: 0.6676 
Epoch 3/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - loss: 0.6213
Epoch 4/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - loss: 0.5457 
Epoch 5/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - loss: 0.4454
Epoch 6/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step - loss: 0.3362
Epoch 7/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - loss: 0.2398
Epoch 8/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step - loss: 0.1704
Epoch 9/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - loss: 0.1230
Epoch 10/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - loss: 0.0930
Epoch 11/50
[1m

<keras.src.callbacks.history.History at 0x12bd98d10>

In [17]:
# export the encoder model
encoder.save('game_encoder.h5')

# export the data to a JSON file
vec_json = {}
for game_id, game_data in json_data.items():
    game_bow = X[game_id] # get the bag of words vector for the game
    vec_json[game_id] = {}
    vec_json[game_id]['name'] = game_data['name']
    vec_json[game_id]['vector'] = encoder.predict(np.array([game_bow]), verbose=0)[0].tolist()
    vec_json[game_id]['img'] = game_data.get('cover_image_url', None)  # add cover image URL if available

with open('game_vecs.json', 'w') as f:
    json.dump(vec_json, f)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33

### Multi-modal Encoding

In [18]:
# import image via url to process
import requests
from PIL import Image
from io import BytesIO

def url2Img(url, MAX_WIDTH=128, MAX_HEIGHT=128):
    ''' Converts an image URL to a PIL Image after resizing it to MAX_WIDTH and MAX_HEIGHT '''
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Error: {response.status_code} - {response.text}")
        return None
    
    img = Image.open(BytesIO(response.content))
    #resize to MAX_WIDTH x MAX_HEIGHT
    img = img.resize((MAX_WIDTH, MAX_HEIGHT), Image.Resampling.LANCZOS)
    return img

# convert the cover images to numpy arrays
def convert_images_to_arrays(dat):
    img_arrays = {}
    with tqdm(total=len(dat), desc="Processing images") as pbar:
        for game_id, game_data in dat.items():
            pbar.set_postfix({"game": game_id})
            if 'cover_image_url' in game_data and game_data['cover_image_url']:
                img = url2Img(game_data['cover_image_url'])
                if img is not None:
                    img_array = np.array(img)
                    img_arrays[game_id] = img_array
            pbar.update(1)
    return img_arrays

# create the image dataset
X_imgs = convert_images_to_arrays(json_data)
X_ratings = {game_id: game_data.get('rating', 0) for game_id, game_data in json_data.items()}

Processing images: 100%|██████████| 498/498 [01:16<00:00,  6.52it/s, game=11137] 


In [58]:
print(np.array([X_imgs['1020'],X_imgs['1905'], X_imgs['17269']]).shape)


(3, 128, 128, 3)


In [75]:
# --- Parameters ---
bow_dim = len(bow)        # input size of BoW
image_shape = (128, 128, 3)
encoding_dim = 256     # final embedding size

# --- Inputs ---
text_input = Input(shape=(bow_dim,), name='text_input')
image_input = Input(shape=image_shape, name='image_input')
float_input = Input(shape=(1,), name='float_input')

# --- Text Encoder (BoW -> Dense) ---
text_encoded = Dense(512, activation='relu')(text_input)

# --- Image Encoder (Simple CNN) ---
x = Conv2D(32, (3,3), activation='relu')(image_input)
x = MaxPooling2D((2,2))(x)
x = Conv2D(64, (3,3), activation='relu')(x)
x = MaxPooling2D((2,2))(x)
x = Flatten()(x)
image_encoded = Dense(256, activation='relu')(x)

# --- Float Encoder (Dense) ---
float_encoded = Dense(32, activation='relu')(float_input)

# --- Merge and Final Encoding ---
merged = Concatenate()([text_encoded, image_encoded, float_encoded])
final_encoding = Dense(encoding_dim, activation='relu', name='embedding')(merged)

# --- Model ---
multi_input_encoder = Model(inputs=[text_input, image_input, float_input],
                            outputs=final_encoding)


In [76]:
valid_ids = []
for game_id in json_data.keys():
    if game_id not in X_imgs or X_imgs[game_id].shape != image_shape:
        X_imgs[game_id] = np.zeros(image_shape, dtype=np.uint8)
    if game_id in X_imgs and game_id in X_ratings and game_id in X:
        valid_ids.append(game_id)

In [77]:
# train the model
X_train_images = []
X_train_text = []
X_train_float = []
for game_id in valid_ids:
    X_train_images.append(np.array(X_imgs[game_id]))
    X_train_text.append(np.array(X[game_id]))
    X_train_float.append(np.array(X_ratings[game_id]))

X_train_images = np.array(X_train_images)
X_train_text = np.array(X_train_text)
X_train_float = np.array(X_train_float)

print(f"Training with {len(X_train_text)} text samples, {len(X_train_images)} image samples, and {len(X_train_float)} float samples.")
print(f"Text shape: {X_train_text.shape}, Image shape: {X_train_images.shape}, Float shape: {X_train_float.shape}")


Training with 498 text samples, 498 image samples, and 498 float samples.
Text shape: (498, 5755), Image shape: (498, 128, 128, 3), Float shape: (498,)


In [81]:
# export the multi-input encoder model
multi_input_encoder.save('multi_input_encoder.h5')

# export the data to a JSON file
multi_vec_json = {}
for game_id, game_data in json_data.items():
    multi_vec_json[game_id] = {
        "name": game_data["name"],
        "vector": multi_input_encoder.predict([X[game_id], X_imgs[game_id], X_ratings[game_id]]).tolist(),
        "img": game_data.get('cover_image_url', None)  # add cover image URL if available
    }

with open('game_multivecs.json', 'w') as f:
    json.dump(multi_vec_json, f)



ValueError: Unrecognized data type: x=[array([2., 2., 2., ..., 0., 0., 0.], dtype=float32), array([[[  0,  39, 100],
        [  0,  39, 100],
        [  0,  39, 100],
        ...,
        [  0,  40, 104],
        [  1,  38, 101],
        [  2,  39,  99]],

       [[  0,  39, 101],
        [  0,  39, 101],
        [  0,  39, 101],
        ...,
        [  0,  41, 105],
        [  1,  39, 101],
        [  3,  39,  99]],

       [[  0,  39, 101],
        [  0,  39, 101],
        [  0,  39, 101],
        ...,
        [  0,  42, 106],
        [  1,  40, 103],
        [  2,  40, 102]],

       ...,

       [[  1,  33,  86],
        [  1,  33,  86],
        [  1,  33,  86],
        ...,
        [  1,  33,  86],
        [  1,  33,  86],
        [  1,  33,  86]],

       [[  1,  32,  82],
        [  1,  32,  82],
        [  1,  32,  82],
        ...,
        [  1,  32,  82],
        [  1,  32,  82],
        [  1,  32,  82]],

       [[  1,  31,  81],
        [  1,  31,  81],
        [  1,  31,  81],
        ...,
        [  1,  31,  81],
        [  1,  31,  81],
        [  1,  31,  81]]], dtype=uint8), 75.03218934910484] (of type <class 'list'>)